Merge branch 'afs-dh' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Apr 2018 18:59:06 +0000 (11:59 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Apr 2018 18:59:06 +0000 (11:59 -0700)
Pull AFS updates from Al Viro:
 "The AFS series posted by dhowells depended upon lookup_one_len()
  rework; now that prereq is in the mainline, that series had been
  rebased on top of it and got some exposure and testing..."

* 'afs-dh' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  afs: Do better accretion of small writes on newly created content
  afs: Add stats for data transfer operations
  afs: Trace protocol errors
  afs: Locally edit directory data for mkdir/create/unlink/...
  afs: Adjust the directory XDR structures
  afs: Split the directory content defs into a header
  afs: Fix directory handling
  afs: Split the dynroot stuff out and give it its own ops tables
  afs: Keep track of invalid-before version for dentry coherency
  afs: Rearrange status mapping
  afs: Make it possible to get the data version in readpage
  afs: Init inode before accessing cache
  afs: Introduce a statistics proc file
  afs: Dump bad status record
  afs: Implement @cell substitution handling
  afs: Implement @sys substitution handling
  afs: Prospectively look up extra files when doing a single lookup
  afs: Don't over-increment the cell usage count when pinning it
  afs: Fix checker warnings
  vfs: Remove the const from dir_context::actor

876 files changed:
.clang-format [new file with mode: 0644]
.gitignore
Documentation/ABI/testing/sysfs-class-rtc
Documentation/cgroup-v1/memory.txt
Documentation/cpu-freq/core.txt
Documentation/cpu-freq/cpu-drivers.txt
Documentation/cpuidle/sysfs.txt
Documentation/devicetree/bindings/dma/mtk-hsdma.txt [new file with mode: 0644]
Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt [new file with mode: 0644]
Documentation/devicetree/bindings/dma/stm32-dma.txt
Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
Documentation/devicetree/bindings/mips/mscc.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
Documentation/devicetree/bindings/pmem/pmem-region.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/isil,isl12026.txt [new file with mode: 0644]
Documentation/devicetree/bindings/vendor-prefixes.txt
Documentation/filesystems/ceph.txt
Documentation/hwmon/adm1275
Documentation/hwmon/lm92
Documentation/hwmon/nct6775
Documentation/hwmon/sht21
Documentation/hwmon/sht3x
Documentation/media/kapi/v4l2-dev.rst
Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst
Documentation/media/uapi/mediactl/media-ioc-g-topology.rst
Documentation/media/uapi/mediactl/media-types.rst
Documentation/media/uapi/v4l/extended-controls.rst
Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
Documentation/media/uapi/v4l/pixfmt-v4l2.rst
Documentation/process/4.Coding.rst
Documentation/process/clang-format.rst [new file with mode: 0644]
Documentation/process/coding-style.rst
Documentation/sysctl/kernel.txt
Documentation/sysctl/vm.txt
Documentation/trace/events.rst
Documentation/trace/ftrace.rst
Documentation/trace/histogram.txt [new file with mode: 0644]
Documentation/vm/hmm.txt
Documentation/vm/page_migration
MAINTAINERS
arch/alpha/include/uapi/asm/mman.h
arch/arm/boot/dts/ls1021a.dtsi
arch/arm/include/asm/cacheflush.h
arch/arm/include/asm/memory.h
arch/arm/mach-npcm/npcm7xx.c
arch/arm/mm/dma-mapping.c
arch/arm/mm/mmap.c
arch/arm64/include/asm/cacheflush.h
arch/arm64/include/asm/memory.h
arch/arm64/mm/mmap.c
arch/c6x/Makefile
arch/c6x/kernel/asm-offsets.c
arch/c6x/platforms/plldata.c
arch/microblaze/include/asm/pci.h
arch/microblaze/include/asm/pgtable.h
arch/microblaze/pci/pci-common.c
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/alchemy/board-gpr.c
arch/mips/alchemy/board-mtx1.c
arch/mips/ar7/platform.c
arch/mips/bcm47xx/buttons.c
arch/mips/bcm47xx/leds.c
arch/mips/boot/dts/Makefile
arch/mips/boot/dts/brcm/bcm7125.dtsi
arch/mips/boot/dts/brcm/bcm7346.dtsi
arch/mips/boot/dts/brcm/bcm7358.dtsi
arch/mips/boot/dts/brcm/bcm7360.dtsi
arch/mips/boot/dts/brcm/bcm7362.dtsi
arch/mips/boot/dts/brcm/bcm7420.dtsi
arch/mips/boot/dts/brcm/bcm7425.dtsi
arch/mips/boot/dts/brcm/bcm7435.dtsi
arch/mips/boot/dts/brcm/bcm97125cbmb.dts
arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
arch/mips/boot/dts/brcm/bcm97358svmb.dts
arch/mips/boot/dts/brcm/bcm97360svmb.dts
arch/mips/boot/dts/brcm/bcm97362svmb.dts
arch/mips/boot/dts/brcm/bcm97420c.dts
arch/mips/boot/dts/brcm/bcm97425svmb.dts
arch/mips/boot/dts/brcm/bcm97435svmb.dts
arch/mips/boot/dts/img/boston.dts
arch/mips/boot/dts/ingenic/ci20.dts
arch/mips/boot/dts/mscc/Makefile [new file with mode: 0644]
arch/mips/boot/dts/mscc/ocelot.dtsi [new file with mode: 0644]
arch/mips/boot/dts/mscc/ocelot_pcb123.dts [new file with mode: 0644]
arch/mips/cavium-octeon/octeon-irq.c
arch/mips/configs/bmips_stb_defconfig
arch/mips/configs/generic/32r6.config
arch/mips/configs/generic/64r6.config
arch/mips/configs/generic/board-ocelot.config [new file with mode: 0644]
arch/mips/crypto/Makefile [new file with mode: 0644]
arch/mips/crypto/crc32-mips.c [new file with mode: 0644]
arch/mips/generic/Kconfig
arch/mips/generic/Makefile
arch/mips/generic/board-ocelot.c [new file with mode: 0644]
arch/mips/include/asm/cpu-features.h
arch/mips/include/asm/isa-rev.h [new file with mode: 0644]
arch/mips/include/asm/mach-ath79/ar71xx_regs.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/uapi/asm/hwcap.h
arch/mips/include/uapi/asm/mman.h
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/pm-cps.c
arch/mips/kernel/reset.c
arch/mips/kernel/setup.c
arch/mips/mm/init.c
arch/mips/mm/mmap.c
arch/mips/net/bpf_jit_asm.S
arch/mips/pci/pci-mt7620.c
arch/mips/txx9/rbtx4927/setup.c
arch/mips/vdso/elf.S
arch/nds32/include/asm/cacheflush.h
arch/nios2/include/asm/cacheflush.h
arch/nios2/kernel/time.c
arch/parisc/include/asm/cacheflush.h
arch/parisc/include/uapi/asm/mman.h
arch/parisc/kernel/sys_parisc.c
arch/parisc/kernel/time.c
arch/powerpc/mm/mmap.c
arch/powerpc/mm/mmu_context_iommu.c
arch/powerpc/platforms/powernv/opal.c
arch/s390/mm/mmap.c
arch/sparc/kernel/sys_sparc_64.c
arch/um/Kconfig.net
arch/um/drivers/Makefile
arch/um/drivers/chan_kern.c
arch/um/drivers/line.c
arch/um/drivers/net_kern.c
arch/um/drivers/random.c
arch/um/drivers/ubd_kern.c
arch/um/drivers/vector_kern.c [new file with mode: 0644]
arch/um/drivers/vector_kern.h [new file with mode: 0644]
arch/um/drivers/vector_transports.c [new file with mode: 0644]
arch/um/drivers/vector_user.c [new file with mode: 0644]
arch/um/drivers/vector_user.h [new file with mode: 0644]
arch/um/include/asm/asm-prototypes.h [new file with mode: 0644]
arch/um/include/asm/irq.h
arch/um/include/shared/irq_user.h
arch/um/include/shared/net_kern.h
arch/um/include/shared/os.h
arch/um/kernel/irq.c
arch/um/kernel/time.c
arch/um/os-Linux/file.c
arch/um/os-Linux/irq.c
arch/um/os-Linux/signal.c
arch/unicore32/include/asm/cacheflush.h
arch/unicore32/include/asm/memory.h
arch/x86/include/asm/x86_init.h
arch/x86/kernel/x86_init.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/mmap.c
arch/x86/um/stub_segv.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/smp_pv.c
arch/x86/xen/xen-head.S
arch/xtensa/include/uapi/asm/mman.h
crypto/Kconfig
crypto/af_alg.c
drivers/acpi/arm64/iort.c
drivers/acpi/nfit/core.c
drivers/acpi/nfit/mce.c
drivers/acpi/nfit/nfit.h
drivers/acpi/processor_perflib.c
drivers/base/memory.c
drivers/block/rbd.c
drivers/char/rtc.c
drivers/cpufreq/armada-37xx-cpufreq.c
drivers/cpufreq/cppc_cpufreq.c
drivers/cpufreq/freq_table.c
drivers/cpufreq/intel_pstate.c
drivers/cpufreq/scmi-cpufreq.c
drivers/cpufreq/ti-cpufreq.c
drivers/cpuidle/cpuidle.c
drivers/cpuidle/governors/ladder.c
drivers/cpuidle/governors/menu.c
drivers/dax/Kconfig
drivers/dax/device.c
drivers/dax/pmem.c
drivers/dax/super.c
drivers/dma/Kconfig
drivers/dma/Makefile
drivers/dma/at_xdmac.c
drivers/dma/dmatest.c
drivers/dma/dw-axi-dmac/Makefile [new file with mode: 0644]
drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c [new file with mode: 0644]
drivers/dma/dw-axi-dmac/dw-axi-dmac.h [new file with mode: 0644]
drivers/dma/edma.c
drivers/dma/imx-sdma.c
drivers/dma/mediatek/Kconfig [new file with mode: 0644]
drivers/dma/mediatek/Makefile [new file with mode: 0644]
drivers/dma/mediatek/mtk-hsdma.c [new file with mode: 0644]
drivers/dma/pl330.c
drivers/dma/qcom/bam_dma.c
drivers/dma/sh/rcar-dmac.c
drivers/dma/stm32-dma.c
drivers/firmware/broadcom/Kconfig
drivers/firmware/broadcom/bcm47xx_sprom.c
drivers/gpu/drm/msm/adreno/a5xx_gpu.c
drivers/hwmon/Kconfig
drivers/hwmon/g762.c
drivers/hwmon/lm92.c
drivers/hwmon/nct6775.c
drivers/hwmon/pmbus/Kconfig
drivers/hwmon/pmbus/adm1275.c
drivers/hwmon/pmbus/max8688.c
drivers/hwmon/pmbus/ucd9000.c
drivers/hwmon/sht21.c
drivers/hwmon/via-cputemp.c
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/amd_iommu_types.h
drivers/iommu/arm-smmu-v3.c
drivers/iommu/dma-iommu.c
drivers/iommu/dmar.c
drivers/iommu/exynos-iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/intel-svm.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/iommu/io-pgtable-arm.c
drivers/iommu/io-pgtable.h
drivers/iommu/iommu.c
drivers/iommu/mtk_iommu.c
drivers/iommu/mtk_iommu.h
drivers/iommu/mtk_iommu_v1.c
drivers/iommu/omap-iommu.c
drivers/iommu/rockchip-iommu.c
drivers/irqchip/irq-gic-v3-its.c
drivers/md/Kconfig
drivers/md/dm-linear.c
drivers/md/dm-log-writes.c
drivers/md/dm-stripe.c
drivers/md/dm.c
drivers/media/cec/cec-pin.c
drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
drivers/media/dvb-core/dvb_frontend.c
drivers/media/i2c/adv748x/adv748x-afe.c
drivers/media/i2c/dw9714.c
drivers/media/i2c/imx274.c
drivers/media/i2c/ov13858.c
drivers/media/i2c/ov2685.c
drivers/media/i2c/ov5640.c
drivers/media/i2c/ov5645.c
drivers/media/i2c/ov5670.c
drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
drivers/media/platform/qcom/venus/firmware.c
drivers/media/platform/qcom/venus/vdec.c
drivers/media/platform/qcom/venus/venc.c
drivers/media/platform/vivid/vivid-vid-cap.c
drivers/media/platform/vsp1/vsp1_wpf.c
drivers/media/tuners/r820t.c
drivers/media/usb/cx231xx/cx231xx-dvb.c
drivers/media/usb/gspca/Kconfig
drivers/media/v4l2-core/v4l2-compat-ioctl32.c
drivers/media/v4l2-core/v4l2-dev.c
drivers/misc/kgdbts.c
drivers/mmc/core/block.c
drivers/mmc/host/jz4740_mmc.c
drivers/mmc/host/tmio_mmc_core.c
drivers/mtd/ubi/block.c
drivers/mtd/ubi/build.c
drivers/mtd/ubi/fastmap-wl.c
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
drivers/net/ethernet/cavium/thunder/nic.h
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/freescale/fsl_pq_mdio.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/intel/ice/ice_common.c
drivers/net/ethernet/intel/ice/ice_ethtool.c
drivers/net/ethernet/marvell/mvpp2.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
drivers/net/ethernet/sfc/mcdi.c
drivers/net/hyperv/netvsc.c
drivers/net/netdevsim/devlink.c
drivers/net/phy/dp83640.c
drivers/net/phy/marvell.c
drivers/net/slip/slhc.c
drivers/net/tun.c
drivers/net/usb/cdc_ether.c
drivers/net/usb/lan78xx.c
drivers/net/wireless/mac80211_hwsim.c
drivers/nvdimm/Kconfig
drivers/nvdimm/Makefile
drivers/nvdimm/btt_devs.c
drivers/nvdimm/bus.c
drivers/nvdimm/claim.c
drivers/nvdimm/core.c
drivers/nvdimm/dax_devs.c
drivers/nvdimm/dimm.c
drivers/nvdimm/dimm_devs.c
drivers/nvdimm/label.c
drivers/nvdimm/label.h
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/nd.h
drivers/nvdimm/of_pmem.c [new file with mode: 0644]
drivers/nvdimm/pfn_devs.c
drivers/nvdimm/pmem.c
drivers/nvdimm/region.c
drivers/nvdimm/region_devs.c
drivers/of/unittest.c
drivers/platform/mellanox/mlxreg-hotplug.c
drivers/platform/x86/Kconfig
drivers/platform/x86/dell-smbios-base.c
drivers/platform/x86/fujitsu-laptop.c
drivers/platform/x86/gpd-pocket-fan.c
drivers/platform/x86/intel-hid.c
drivers/platform/x86/intel_turbo_max_3.c
drivers/platform/x86/mlx-platform.c
drivers/platform/x86/silead_dmi.c
drivers/platform/x86/thinkpad_acpi.c
drivers/platform/x86/topstar-laptop.c
drivers/platform/x86/wmi.c
drivers/rapidio/devices/rio_mport_cdev.c
drivers/rapidio/rio-scan.c
drivers/remoteproc/Kconfig
drivers/remoteproc/Makefile
drivers/remoteproc/imx_rproc.c
drivers/remoteproc/qcom_adsp_pil.c
drivers/remoteproc/qcom_common.c
drivers/remoteproc/qcom_common.h
drivers/remoteproc/qcom_q6v5_pil.c
drivers/remoteproc/qcom_sysmon.c [new file with mode: 0644]
drivers/remoteproc/qcom_wcnss.c
drivers/remoteproc/remoteproc_core.c
drivers/remoteproc/remoteproc_internal.h
drivers/rpmsg/qcom_glink_native.c
drivers/rpmsg/qcom_glink_smem.c
drivers/rpmsg/qcom_smd.c
drivers/rpmsg/rpmsg_core.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/class.c
drivers/rtc/hctosys.c
drivers/rtc/interface.c
drivers/rtc/nvmem.c
drivers/rtc/rtc-88pm80x.c
drivers/rtc/rtc-88pm860x.c
drivers/rtc/rtc-ab-b5ze-s3.c
drivers/rtc/rtc-ab3100.c
drivers/rtc/rtc-ab8500.c
drivers/rtc/rtc-abx80x.c
drivers/rtc/rtc-ac100.c
drivers/rtc/rtc-at91sam9.c
drivers/rtc/rtc-au1xxx.c
drivers/rtc/rtc-bq32k.c
drivers/rtc/rtc-brcmstb-waketimer.c
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-coh901331.c
drivers/rtc/rtc-core.h
drivers/rtc/rtc-cpcap.c
drivers/rtc/rtc-cros-ec.c
drivers/rtc/rtc-da9052.c
drivers/rtc/rtc-da9055.c
drivers/rtc/rtc-da9063.c
drivers/rtc/rtc-ds1216.c
drivers/rtc/rtc-ds1286.c
drivers/rtc/rtc-ds1302.c
drivers/rtc/rtc-ds1305.c
drivers/rtc/rtc-ds1307.c
drivers/rtc/rtc-ds1343.c
drivers/rtc/rtc-ds1347.c
drivers/rtc/rtc-ds1390.c
drivers/rtc/rtc-ds1511.c
drivers/rtc/rtc-ds1553.c
drivers/rtc/rtc-ds1685.c
drivers/rtc/rtc-ds1742.c
drivers/rtc/rtc-ds2404.c
drivers/rtc/rtc-ds3232.c
drivers/rtc/rtc-efi.c
drivers/rtc/rtc-fm3130.c
drivers/rtc/rtc-goldfish.c
drivers/rtc/rtc-isl12022.c
drivers/rtc/rtc-isl12026.c [new file with mode: 0644]
drivers/rtc/rtc-isl1208.c
drivers/rtc/rtc-jz4740.c
drivers/rtc/rtc-lib.c
drivers/rtc/rtc-lpc24xx.c
drivers/rtc/rtc-lpc32xx.c
drivers/rtc/rtc-ls1x.c
drivers/rtc/rtc-m41t80.c
drivers/rtc/rtc-m41t93.c
drivers/rtc/rtc-m41t94.c
drivers/rtc/rtc-m48t35.c
drivers/rtc/rtc-m48t59.c
drivers/rtc/rtc-m48t86.c
drivers/rtc/rtc-max6900.c
drivers/rtc/rtc-max6902.c
drivers/rtc/rtc-max6916.c
drivers/rtc/rtc-max77686.c
drivers/rtc/rtc-max8997.c
drivers/rtc/rtc-max8998.c
drivers/rtc/rtc-mc13xxx.c
drivers/rtc/rtc-mcp795.c
drivers/rtc/rtc-mpc5121.c
drivers/rtc/rtc-mrst.c
drivers/rtc/rtc-msm6242.c
drivers/rtc/rtc-mt7622.c
drivers/rtc/rtc-mv.c
drivers/rtc/rtc-mxc_v2.c
drivers/rtc/rtc-nuc900.c
drivers/rtc/rtc-omap.c
drivers/rtc/rtc-pcap.c
drivers/rtc/rtc-pcf2123.c
drivers/rtc/rtc-pcf2127.c
drivers/rtc/rtc-pcf50633.c
drivers/rtc/rtc-pcf85063.c
drivers/rtc/rtc-pcf8523.c
drivers/rtc/rtc-pcf85363.c
drivers/rtc/rtc-pic32.c
drivers/rtc/rtc-pm8xxx.c
drivers/rtc/rtc-ps3.c
drivers/rtc/rtc-r7301.c
drivers/rtc/rtc-r9701.c
drivers/rtc/rtc-rk808.c
drivers/rtc/rtc-rp5c01.c
drivers/rtc/rtc-rs5c348.c
drivers/rtc/rtc-rs5c372.c
drivers/rtc/rtc-rv8803.c
drivers/rtc/rtc-rx4581.c
drivers/rtc/rtc-rx6110.c
drivers/rtc/rtc-rx8010.c
drivers/rtc/rtc-rx8025.c
drivers/rtc/rtc-rx8581.c
drivers/rtc/rtc-s35390a.c
drivers/rtc/rtc-s3c.c
drivers/rtc/rtc-s5m.c
drivers/rtc/rtc-sc27xx.c
drivers/rtc/rtc-sh.c
drivers/rtc/rtc-sirfsoc.c
drivers/rtc/rtc-snvs.c
drivers/rtc/rtc-spear.c
drivers/rtc/rtc-st-lpc.c
drivers/rtc/rtc-starfire.c
drivers/rtc/rtc-stk17ta8.c
drivers/rtc/rtc-sun6i.c
drivers/rtc/rtc-sunxi.c
drivers/rtc/rtc-sysfs.c
drivers/rtc/rtc-tegra.c
drivers/rtc/rtc-tps6586x.c
drivers/rtc/rtc-tx4939.c
drivers/rtc/rtc-wm831x.c
drivers/rtc/rtc-xgene.c
drivers/rtc/rtc-zynqmp.c
drivers/rtc/systohc.c
drivers/s390/block/Kconfig
drivers/soc/qcom/Kconfig
drivers/soc/qcom/mdt_loader.c
drivers/staging/lustre/lustre/llite/glimpse.c
drivers/staging/lustre/lustre/mdc/mdc_request.c
drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
drivers/staging/media/atomisp/i2c/gc0310.h
drivers/staging/media/atomisp/i2c/ov2722.h
drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
drivers/staging/media/atomisp/i2c/ov5693/ov5693.h
drivers/staging/media/atomisp/include/linux/atomisp_platform.h
drivers/staging/media/atomisp/pci/atomisp2/Makefile
drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.h
drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat.h
drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.h
drivers/staging/media/atomisp/pci/atomisp2/atomisp_drvfs.c
drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
drivers/staging/media/atomisp/pci/atomisp2/atomisp_ioctl.c
drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.c
drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/src/util.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/debug.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_formatter.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_local.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_private.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/system_global.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/input_system_public.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm.host.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.c [deleted file]
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.h [deleted file]
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2.host.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.c [deleted file]
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.h [deleted file]
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.c [deleted file]
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.h [deleted file]
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output.host.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw.host.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.c [deleted file]
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.h [deleted file]
drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf.host.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/interface/ia_css_inputfifo.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.h
drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.c
drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.h
drivers/staging/media/atomisp/pci/atomisp2/include/mmu/isp_mmu.h
drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
drivers/staging/media/atomisp/pci/atomisp2/mmu/sh_mmu_mrfld.c
drivers/staging/media/atomisp/platform/intel-mid/atomisp_gmin_platform.c
drivers/staging/media/davinci_vpfe/dm365_resizer.c
drivers/staging/media/imx/imx-media-csi.c
drivers/vhost/net.c
drivers/vhost/vhost.c
drivers/vhost/vhost.h
drivers/video/Kconfig
drivers/video/console/sticore.c
drivers/video/fbdev/Kconfig
drivers/video/fbdev/amba-clcd.c
drivers/video/fbdev/atmel_lcdfb.c
drivers/video/fbdev/aty/aty128fb.c
drivers/video/fbdev/aty/mach64_ct.c
drivers/video/fbdev/aty/radeon_base.c
drivers/video/fbdev/au1100fb.c
drivers/video/fbdev/fsl-diu-fb.c
drivers/video/fbdev/matrox/matroxfb_crtc2.c
drivers/video/fbdev/offb.c
drivers/video/fbdev/s3c-fb.c
drivers/video/fbdev/sis/init.h
drivers/video/fbdev/sis/init301.c
drivers/video/fbdev/sis/init301.h
drivers/video/fbdev/sis/sis.h
drivers/video/fbdev/sis/sis_main.c
drivers/video/fbdev/sis/sis_main.h
drivers/video/fbdev/smscufx.c
drivers/video/fbdev/ssd1307fb.c
drivers/video/fbdev/stifb.c
drivers/video/fbdev/udlfb.c
drivers/video/fbdev/vermilion/vermilion.c
drivers/video/fbdev/via/via_aux_sii164.c
drivers/video/fbdev/via/via_aux_vt1631.c
drivers/video/fbdev/via/via_aux_vt1632.c
drivers/video/fbdev/via/via_aux_vt1636.c
drivers/video/of_display_timing.c
drivers/virtio/virtio_balloon.c
drivers/xen/xen-acpi-processor.c
drivers/xen/xenbus/xenbus_dev_frontend.c
drivers/xen/xenbus/xenbus_xs.c
fs/afs/write.c
fs/autofs4/waitq.c
fs/binfmt_aout.c
fs/binfmt_elf.c
fs/binfmt_elf_fdpic.c
fs/binfmt_flat.c
fs/block_dev.c
fs/btrfs/compression.c
fs/btrfs/extent_io.c
fs/buffer.c
fs/ceph/Makefile
fs/ceph/addr.c
fs/ceph/cache.c
fs/ceph/caps.c
fs/ceph/debugfs.c
fs/ceph/dir.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/ioctl.c
fs/ceph/locks.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/quota.c [new file with mode: 0644]
fs/ceph/snap.c
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/cifs/file.c
fs/dax.c
fs/dcache.c
fs/exec.c
fs/ext2/ext2.h
fs/ext2/inode.c
fs/ext2/namei.c
fs/ext4/inode.c
fs/f2fs/data.c
fs/f2fs/dir.c
fs/f2fs/gc.c
fs/f2fs/inline.c
fs/f2fs/node.c
fs/fs-writeback.c
fs/fscache/cookie.c
fs/fscache/object.c
fs/inode.c
fs/libfs.c
fs/nilfs2/btnode.c
fs/nilfs2/page.c
fs/proc/array.c
fs/proc/base.c
fs/proc/cmdline.c
fs/proc/generic.c
fs/proc/inode.c
fs/proc/internal.h
fs/proc/meminfo.c
fs/proc/proc_net.c
fs/proc/proc_sysctl.c
fs/proc/root.c
fs/proc/task_mmu.c
fs/reiserfs/journal.c
fs/seq_file.c
fs/ubifs/find.c
fs/ubifs/lprops.c
fs/ubifs/scan.c
fs/ubifs/super.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_aops.h
fs/xfs/xfs_iops.c
include/acpi/processor.h
include/asm-generic/io.h
include/linux/acpi_iort.h
include/linux/backing-dev.h
include/linux/binfmts.h
include/linux/ceph/ceph_features.h
include/linux/ceph/ceph_fs.h
include/linux/ceph/libceph.h
include/linux/ceph/messenger.h
include/linux/ceph/osd_client.h
include/linux/ceph/osdmap.h
include/linux/ceph/striper.h [new file with mode: 0644]
include/linux/compiler-clang.h
include/linux/compiler-gcc.h
include/linux/const.h [new file with mode: 0644]
include/linux/cpufreq.h
include/linux/cpuidle.h
include/linux/dax.h
include/linux/dmaengine.h
include/linux/fs.h
include/linux/hmm.h
include/linux/hrtimer.h
include/linux/idr.h
include/linux/intel-iommu.h
include/linux/iommu.h
include/linux/jiffies.h
include/linux/kernel.h
include/linux/kfifo.h
include/linux/libnvdimm.h
include/linux/memcontrol.h
include/linux/memory_hotplug.h
include/linux/mfd/samsung/rtc.h
include/linux/migrate.h
include/linux/mm.h
include/linux/mmzone.h
include/linux/nd.h
include/linux/page-isolation.h
include/linux/pagemap.h
include/linux/platform_data/mlxreg.h
include/linux/radix-tree.h
include/linux/remoteproc.h
include/linux/ring_buffer.h
include/linux/rtc.h
include/linux/sched/mm.h
include/linux/seq_file.h
include/linux/soc/qcom/mdt_loader.h
include/linux/tick.h
include/linux/timekeeping.h
include/linux/trace_events.h
include/linux/utsname.h
include/linux/vmstat.h
include/linux/xarray.h [new file with mode: 0644]
include/media/v4l2-common.h
include/media/v4l2-dev.h
include/net/bluetooth/hci_core.h
include/net/devlink.h
include/net/inet_timewait_sock.h
include/net/nexthop.h
include/net/slhc_vj.h
include/trace/events/initcall.h [new file with mode: 0644]
include/trace/events/rtc.h [new file with mode: 0644]
include/trace/events/vmscan.h
include/uapi/asm-generic/mman-common.h
include/uapi/linux/const.h
include/uapi/linux/msg.h
include/uapi/linux/sem.h
include/uapi/linux/shm.h
include/uapi/linux/virtio_balloon.h
include/video/of_display_timing.h
include/xen/interface/features.h
init/do_mounts_rd.c
init/main.c
ipc/msg.c
ipc/sem.c
ipc/shm.c
ipc/util.c
kernel/bpf/sockmap.c
kernel/bpf/syscall.c
kernel/debug/kdb/kdb_bp.c
kernel/debug/kdb/kdb_main.c
kernel/debug/kdb/kdb_support.c
kernel/panic.c
kernel/params.c
kernel/pid.c
kernel/power/qos.c
kernel/printk/printk.c
kernel/sched/idle.c
kernel/sysctl.c
kernel/time/hrtimer.c
kernel/time/ntp.c
kernel/time/tick-sched.c
kernel/time/tick-sched.h
kernel/time/timekeeping_internal.h
kernel/trace/Kconfig
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_clock.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_events_hist.c
kernel/trace/trace_events_trigger.c
kernel/trace/tracing_map.c
kernel/trace/tracing_map.h
kernel/utsname.c
lib/Kconfig.debug
lib/Kconfig.ubsan
lib/Makefile
lib/list_debug.c
lib/radix-tree.c
lib/swiotlb.c
lib/test_bitmap.c
lib/test_kasan.c
lib/test_ubsan.c [new file with mode: 0644]
lib/vsprintf.c
mm/backing-dev.c
mm/cma.c
mm/compaction.c
mm/filemap.c
mm/hmm.c
mm/huge_memory.c
mm/internal.h
mm/khugepaged.c
mm/ksm.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/migrate.c
mm/mmap.c
mm/mprotect.c
mm/page-writeback.c
mm/page_alloc.c
mm/page_isolation.c
mm/readahead.c
mm/rmap.c
mm/shmem.c
mm/slub.c
mm/swap_state.c
mm/swapfile.c
mm/truncate.c
mm/util.c
mm/vmscan.c
mm/vmstat.c
mm/workingset.c
mm/z3fold.c
net/bluetooth/hci_conn.c
net/bluetooth/hci_event.c
net/bluetooth/l2cap_core.c
net/ceph/Makefile
net/ceph/ceph_common.c
net/ceph/crypto.c
net/ceph/debugfs.c
net/ceph/messenger.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/osdmap.c
net/ceph/striper.c [new file with mode: 0644]
net/core/dev.c
net/core/dev_addr_lists.c
net/core/devlink.c
net/core/skbuff.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/dsa/dsa_priv.h
net/ipv4/arp.c
net/ipv4/inet_timewait_sock.c
net/ipv4/inetpeer.c
net/ipv4/ip_gre.c
net/ipv4/ip_tunnel.c
net/ipv4/route.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6_vti.c
net/ipv6/sit.c
net/l2tp/l2tp_core.c
net/l2tp/l2tp_core.h
net/l2tp/l2tp_netlink.c
net/l2tp/l2tp_ppp.c
net/netlink/af_netlink.c
net/rds/send.c
net/sched/act_bpf.c
net/sched/cls_u32.c
net/sctp/ipv6.c
net/sctp/socket.c
net/tipc/diag.c
net/tipc/socket.c
net/tipc/socket.h
samples/Kconfig
samples/Makefile
samples/qmi/Makefile [new file with mode: 0644]
samples/qmi/qmi_sample_client.c [new file with mode: 0644]
scripts/checkpatch.pl
scripts/dtc/include-prefixes/cris [deleted symlink]
scripts/dtc/include-prefixes/metag [deleted symlink]
security/security.c
security/selinux/hooks.c
security/smack/smack_lsm.c
sound/core/oss/pcm_oss.c
sound/core/pcm_native.c
sound/usb/clock.c
tools/include/linux/spinlock.h
tools/testing/ktest/config-bisect.pl [new file with mode: 0755]
tools/testing/ktest/ktest.pl
tools/testing/ktest/sample.conf
tools/testing/nvdimm/test/nfit.c
tools/testing/nvdimm/test/nfit_test.h
tools/testing/radix-tree/linux/gfp.h
tools/testing/selftests/Makefile
tools/testing/selftests/ftrace/test.d/functions
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc [new file with mode: 0644]
tools/testing/selftests/proc/.gitignore [new file with mode: 0644]
tools/testing/selftests/proc/Makefile [new file with mode: 0644]
tools/testing/selftests/proc/config [new file with mode: 0644]
tools/testing/selftests/proc/proc-loadavg-001.c [new file with mode: 0644]
tools/testing/selftests/proc/proc-self-map-files-001.c [new file with mode: 0644]
tools/testing/selftests/proc/proc-self-map-files-002.c [new file with mode: 0644]
tools/testing/selftests/proc/proc-self-syscall.c [new file with mode: 0644]
tools/testing/selftests/proc/proc-self-wchan.c [new file with mode: 0644]
tools/testing/selftests/proc/proc-uptime-001.c [new file with mode: 0644]
tools/testing/selftests/proc/proc-uptime-002.c [new file with mode: 0644]
tools/testing/selftests/proc/proc-uptime.h [new file with mode: 0644]
tools/testing/selftests/proc/read.c [new file with mode: 0644]

diff --git a/.clang-format b/.clang-format
new file mode 100644 (file)
index 0000000..faffc0d
--- /dev/null
@@ -0,0 +1,428 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-format configuration file. Intended for clang-format >= 4.
+#
+# For more information, see:
+#
+#   Documentation/process/clang-format.rst
+#   https://clang.llvm.org/docs/ClangFormat.html
+#   https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+#
+---
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+#AlignEscapedNewlines: Left # Unknown to clang-format-4.0
+AlignOperands: true
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: false
+  AfterStruct: false
+  AfterUnion: false
+  #AfterExternBlock: false # Unknown to clang-format-5.0
+  BeforeCatch: false
+  BeforeElse: false
+  IndentBraces: false
+  #SplitEmptyFunction: true # Unknown to clang-format-4.0
+  #SplitEmptyRecord: true # Unknown to clang-format-4.0
+  #SplitEmptyNamespace: true # Unknown to clang-format-4.0
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: false
+#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+#CompactNamespaces: false # Unknown to clang-format-4.0
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 8
+ContinuationIndentWidth: 8
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+#FixNamespaceComments: false # Unknown to clang-format-4.0
+
+# Taken from:
+#   git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \
+#   | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$,  - '\1'," \
+#   | sort | uniq
+ForEachMacros:
+  - 'apei_estatus_for_each_section'
+  - 'ata_for_each_dev'
+  - 'ata_for_each_link'
+  - 'ax25_for_each'
+  - 'ax25_uid_for_each'
+  - 'bio_for_each_integrity_vec'
+  - '__bio_for_each_segment'
+  - 'bio_for_each_segment'
+  - 'bio_for_each_segment_all'
+  - 'bio_list_for_each'
+  - 'bip_for_each_vec'
+  - 'blkg_for_each_descendant_post'
+  - 'blkg_for_each_descendant_pre'
+  - 'blk_queue_for_each_rl'
+  - 'bond_for_each_slave'
+  - 'bond_for_each_slave_rcu'
+  - 'btree_for_each_safe128'
+  - 'btree_for_each_safe32'
+  - 'btree_for_each_safe64'
+  - 'btree_for_each_safel'
+  - 'card_for_each_dev'
+  - 'cgroup_taskset_for_each'
+  - 'cgroup_taskset_for_each_leader'
+  - 'cpufreq_for_each_entry'
+  - 'cpufreq_for_each_entry_idx'
+  - 'cpufreq_for_each_valid_entry'
+  - 'cpufreq_for_each_valid_entry_idx'
+  - 'css_for_each_child'
+  - 'css_for_each_descendant_post'
+  - 'css_for_each_descendant_pre'
+  - 'device_for_each_child_node'
+  - 'drm_atomic_crtc_for_each_plane'
+  - 'drm_atomic_crtc_state_for_each_plane'
+  - 'drm_atomic_crtc_state_for_each_plane_state'
+  - 'drm_for_each_connector_iter'
+  - 'drm_for_each_crtc'
+  - 'drm_for_each_encoder'
+  - 'drm_for_each_encoder_mask'
+  - 'drm_for_each_fb'
+  - 'drm_for_each_legacy_plane'
+  - 'drm_for_each_plane'
+  - 'drm_for_each_plane_mask'
+  - 'drm_mm_for_each_hole'
+  - 'drm_mm_for_each_node'
+  - 'drm_mm_for_each_node_in_range'
+  - 'drm_mm_for_each_node_safe'
+  - 'for_each_active_drhd_unit'
+  - 'for_each_active_iommu'
+  - 'for_each_available_child_of_node'
+  - 'for_each_bio'
+  - 'for_each_board_func_rsrc'
+  - 'for_each_bvec'
+  - 'for_each_child_of_node'
+  - 'for_each_clear_bit'
+  - 'for_each_clear_bit_from'
+  - 'for_each_cmsghdr'
+  - 'for_each_compatible_node'
+  - 'for_each_console'
+  - 'for_each_cpu'
+  - 'for_each_cpu_and'
+  - 'for_each_cpu_not'
+  - 'for_each_cpu_wrap'
+  - 'for_each_dev_addr'
+  - 'for_each_dma_cap_mask'
+  - 'for_each_drhd_unit'
+  - 'for_each_dss_dev'
+  - 'for_each_efi_memory_desc'
+  - 'for_each_efi_memory_desc_in_map'
+  - 'for_each_endpoint_of_node'
+  - 'for_each_evictable_lru'
+  - 'for_each_fib6_node_rt_rcu'
+  - 'for_each_fib6_walker_rt'
+  - 'for_each_free_mem_range'
+  - 'for_each_free_mem_range_reverse'
+  - 'for_each_func_rsrc'
+  - 'for_each_hstate'
+  - 'for_each_if'
+  - 'for_each_iommu'
+  - 'for_each_ip_tunnel_rcu'
+  - 'for_each_irq_nr'
+  - 'for_each_lru'
+  - 'for_each_matching_node'
+  - 'for_each_matching_node_and_match'
+  - 'for_each_memblock'
+  - 'for_each_memblock_type'
+  - 'for_each_memcg_cache_index'
+  - 'for_each_mem_pfn_range'
+  - 'for_each_mem_range'
+  - 'for_each_mem_range_rev'
+  - 'for_each_migratetype_order'
+  - 'for_each_msi_entry'
+  - 'for_each_net'
+  - 'for_each_netdev'
+  - 'for_each_netdev_continue'
+  - 'for_each_netdev_continue_rcu'
+  - 'for_each_netdev_feature'
+  - 'for_each_netdev_in_bond_rcu'
+  - 'for_each_netdev_rcu'
+  - 'for_each_netdev_reverse'
+  - 'for_each_netdev_safe'
+  - 'for_each_net_rcu'
+  - 'for_each_new_connector_in_state'
+  - 'for_each_new_crtc_in_state'
+  - 'for_each_new_plane_in_state'
+  - 'for_each_new_private_obj_in_state'
+  - 'for_each_node'
+  - 'for_each_node_by_name'
+  - 'for_each_node_by_type'
+  - 'for_each_node_mask'
+  - 'for_each_node_state'
+  - 'for_each_node_with_cpus'
+  - 'for_each_node_with_property'
+  - 'for_each_of_allnodes'
+  - 'for_each_of_allnodes_from'
+  - 'for_each_of_pci_range'
+  - 'for_each_old_connector_in_state'
+  - 'for_each_old_crtc_in_state'
+  - 'for_each_oldnew_connector_in_state'
+  - 'for_each_oldnew_crtc_in_state'
+  - 'for_each_oldnew_plane_in_state'
+  - 'for_each_oldnew_private_obj_in_state'
+  - 'for_each_old_plane_in_state'
+  - 'for_each_old_private_obj_in_state'
+  - 'for_each_online_cpu'
+  - 'for_each_online_node'
+  - 'for_each_online_pgdat'
+  - 'for_each_pci_bridge'
+  - 'for_each_pci_dev'
+  - 'for_each_pci_msi_entry'
+  - 'for_each_populated_zone'
+  - 'for_each_possible_cpu'
+  - 'for_each_present_cpu'
+  - 'for_each_prime_number'
+  - 'for_each_prime_number_from'
+  - 'for_each_process'
+  - 'for_each_process_thread'
+  - 'for_each_property_of_node'
+  - 'for_each_reserved_mem_region'
+  - 'for_each_resv_unavail_range'
+  - 'for_each_rtdcom'
+  - 'for_each_rtdcom_safe'
+  - 'for_each_set_bit'
+  - 'for_each_set_bit_from'
+  - 'for_each_sg'
+  - 'for_each_sg_page'
+  - '__for_each_thread'
+  - 'for_each_thread'
+  - 'for_each_zone'
+  - 'for_each_zone_zonelist'
+  - 'for_each_zone_zonelist_nodemask'
+  - 'fwnode_for_each_available_child_node'
+  - 'fwnode_for_each_child_node'
+  - 'fwnode_graph_for_each_endpoint'
+  - 'gadget_for_each_ep'
+  - 'hash_for_each'
+  - 'hash_for_each_possible'
+  - 'hash_for_each_possible_rcu'
+  - 'hash_for_each_possible_rcu_notrace'
+  - 'hash_for_each_possible_safe'
+  - 'hash_for_each_rcu'
+  - 'hash_for_each_safe'
+  - 'hctx_for_each_ctx'
+  - 'hlist_bl_for_each_entry'
+  - 'hlist_bl_for_each_entry_rcu'
+  - 'hlist_bl_for_each_entry_safe'
+  - 'hlist_for_each'
+  - 'hlist_for_each_entry'
+  - 'hlist_for_each_entry_continue'
+  - 'hlist_for_each_entry_continue_rcu'
+  - 'hlist_for_each_entry_continue_rcu_bh'
+  - 'hlist_for_each_entry_from'
+  - 'hlist_for_each_entry_from_rcu'
+  - 'hlist_for_each_entry_rcu'
+  - 'hlist_for_each_entry_rcu_bh'
+  - 'hlist_for_each_entry_rcu_notrace'
+  - 'hlist_for_each_entry_safe'
+  - '__hlist_for_each_rcu'
+  - 'hlist_for_each_safe'
+  - 'hlist_nulls_for_each_entry'
+  - 'hlist_nulls_for_each_entry_from'
+  - 'hlist_nulls_for_each_entry_rcu'
+  - 'hlist_nulls_for_each_entry_safe'
+  - 'ide_host_for_each_port'
+  - 'ide_port_for_each_dev'
+  - 'ide_port_for_each_present_dev'
+  - 'idr_for_each_entry'
+  - 'idr_for_each_entry_continue'
+  - 'idr_for_each_entry_ul'
+  - 'inet_bind_bucket_for_each'
+  - 'inet_lhash2_for_each_icsk_rcu'
+  - 'iov_for_each'
+  - 'key_for_each'
+  - 'key_for_each_safe'
+  - 'klp_for_each_func'
+  - 'klp_for_each_object'
+  - 'kvm_for_each_memslot'
+  - 'kvm_for_each_vcpu'
+  - 'list_for_each'
+  - 'list_for_each_entry'
+  - 'list_for_each_entry_continue'
+  - 'list_for_each_entry_continue_rcu'
+  - 'list_for_each_entry_continue_reverse'
+  - 'list_for_each_entry_from'
+  - 'list_for_each_entry_from_reverse'
+  - 'list_for_each_entry_lockless'
+  - 'list_for_each_entry_rcu'
+  - 'list_for_each_entry_reverse'
+  - 'list_for_each_entry_safe'
+  - 'list_for_each_entry_safe_continue'
+  - 'list_for_each_entry_safe_from'
+  - 'list_for_each_entry_safe_reverse'
+  - 'list_for_each_prev'
+  - 'list_for_each_prev_safe'
+  - 'list_for_each_safe'
+  - 'llist_for_each'
+  - 'llist_for_each_entry'
+  - 'llist_for_each_entry_safe'
+  - 'llist_for_each_safe'
+  - 'media_device_for_each_entity'
+  - 'media_device_for_each_intf'
+  - 'media_device_for_each_link'
+  - 'media_device_for_each_pad'
+  - 'netdev_for_each_lower_dev'
+  - 'netdev_for_each_lower_private'
+  - 'netdev_for_each_lower_private_rcu'
+  - 'netdev_for_each_mc_addr'
+  - 'netdev_for_each_uc_addr'
+  - 'netdev_for_each_upper_dev_rcu'
+  - 'netdev_hw_addr_list_for_each'
+  - 'nft_rule_for_each_expr'
+  - 'nla_for_each_attr'
+  - 'nla_for_each_nested'
+  - 'nlmsg_for_each_attr'
+  - 'nlmsg_for_each_msg'
+  - 'nr_neigh_for_each'
+  - 'nr_neigh_for_each_safe'
+  - 'nr_node_for_each'
+  - 'nr_node_for_each_safe'
+  - 'of_for_each_phandle'
+  - 'of_property_for_each_string'
+  - 'of_property_for_each_u32'
+  - 'pci_bus_for_each_resource'
+  - 'ping_portaddr_for_each_entry'
+  - 'plist_for_each'
+  - 'plist_for_each_continue'
+  - 'plist_for_each_entry'
+  - 'plist_for_each_entry_continue'
+  - 'plist_for_each_entry_safe'
+  - 'plist_for_each_safe'
+  - 'pnp_for_each_card'
+  - 'pnp_for_each_dev'
+  - 'protocol_for_each_card'
+  - 'protocol_for_each_dev'
+  - 'queue_for_each_hw_ctx'
+  - 'radix_tree_for_each_contig'
+  - 'radix_tree_for_each_slot'
+  - 'radix_tree_for_each_tagged'
+  - 'rbtree_postorder_for_each_entry_safe'
+  - 'resource_list_for_each_entry'
+  - 'resource_list_for_each_entry_safe'
+  - 'rhl_for_each_entry_rcu'
+  - 'rhl_for_each_rcu'
+  - 'rht_for_each'
+  - 'rht_for_each_continue'
+  - 'rht_for_each_entry'
+  - 'rht_for_each_entry_continue'
+  - 'rht_for_each_entry_rcu'
+  - 'rht_for_each_entry_rcu_continue'
+  - 'rht_for_each_entry_safe'
+  - 'rht_for_each_rcu'
+  - 'rht_for_each_rcu_continue'
+  - '__rq_for_each_bio'
+  - 'rq_for_each_segment'
+  - 'scsi_for_each_prot_sg'
+  - 'scsi_for_each_sg'
+  - 'sctp_for_each_hentry'
+  - 'sctp_skb_for_each'
+  - 'shdma_for_each_chan'
+  - '__shost_for_each_device'
+  - 'shost_for_each_device'
+  - 'sk_for_each'
+  - 'sk_for_each_bound'
+  - 'sk_for_each_entry_offset_rcu'
+  - 'sk_for_each_from'
+  - 'sk_for_each_rcu'
+  - 'sk_for_each_safe'
+  - 'sk_nulls_for_each'
+  - 'sk_nulls_for_each_from'
+  - 'sk_nulls_for_each_rcu'
+  - 'snd_pcm_group_for_each_entry'
+  - 'snd_soc_dapm_widget_for_each_path'
+  - 'snd_soc_dapm_widget_for_each_path_safe'
+  - 'snd_soc_dapm_widget_for_each_sink_path'
+  - 'snd_soc_dapm_widget_for_each_source_path'
+  - 'tb_property_for_each'
+  - 'udp_portaddr_for_each_entry'
+  - 'udp_portaddr_for_each_entry_rcu'
+  - 'usb_hub_for_each_child'
+  - 'v4l2_device_for_each_subdev'
+  - 'v4l2_m2m_for_each_dst_buf'
+  - 'v4l2_m2m_for_each_dst_buf_safe'
+  - 'v4l2_m2m_for_each_src_buf'
+  - 'v4l2_m2m_for_each_src_buf_safe'
+  - 'zorro_for_each_dev'
+
+#IncludeBlocks: Preserve # Unknown to clang-format-5.0
+IncludeCategories:
+  - Regex: '.*'
+    Priority: 1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+#IndentPPDirectives: None # Unknown to clang-format-5.0
+IndentWidth: 8
+IndentWrappedFunctionNames: true
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: Inner
+#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0
+ObjCBlockIndentWidth: 8
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+
+# Taken from git's rules
+#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+PointerAlignment: Right
+ReflowComments: false
+SortIncludes: false
+#SortUsingDeclarations: false # Unknown to clang-format-4.0
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0
+#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0
+SpaceBeforeParens: ControlStatements
+#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+TabWidth: 8
+UseTab: Always
+...
index 85bcc269644259589f93e516e25c28910346b8f5..a1dfd2acd9c36dd716bbbc4cac2855b04edc082d 100644 (file)
@@ -81,6 +81,7 @@ modules.builtin
 !.gitignore
 !.mailmap
 !.cocciconfig
+!.clang-format
 
 #
 # Generated include files
index cf60412882f0ded5c3cdc577f18cb36b05b1866b..95984289a4ee69fd9990e3d2d05a0d59e8785ac9 100644 (file)
@@ -43,6 +43,14 @@ Contact:     linux-rtc@vger.kernel.org
 Description:
                (RO) The name of the RTC corresponding to this sysfs directory
 
+What:          /sys/class/rtc/rtcX/range
+Date:          January 2018
+KernelVersion: 4.16
+Contact:       linux-rtc@vger.kernel.org
+Description:
+               Valid time range for the RTC, as seconds from epoch, formatted
+               as [min, max]
+
 What:          /sys/class/rtc/rtcX/since_epoch
 Date:          March 2006
 KernelVersion: 2.6.17
@@ -57,14 +65,6 @@ Contact:     linux-rtc@vger.kernel.org
 Description:
                (RO) RTC-provided time in 24-hour notation (hh:mm:ss)
 
-What:          /sys/class/rtc/rtcX/*/nvmem
-Date:          February 2016
-KernelVersion: 4.6
-Contact:       linux-rtc@vger.kernel.org
-Description:
-               (RW) The non volatile storage exported as a raw file, as
-               described in Documentation/nvmem/nvmem.txt
-
 What:          /sys/class/rtc/rtcX/offset
 Date:          February 2016
 KernelVersion: 4.6
index a4af2e124e246fefd1aafd21c22cd20c43f1248f..3682e99234c2c6652ac4990504dfb14bd3873618 100644 (file)
@@ -262,7 +262,7 @@ When oom event notifier is registered, event will be delivered.
 2.6 Locking
 
    lock_page_cgroup()/unlock_page_cgroup() should not be called under
-   mapping->tree_lock.
+   the i_pages lock.
 
    Other lock order is following:
    PG_locked.
index 978463a7c81ea59ea24f2e967f9683321ef1188e..073f128af5a741c531eddcf77c65d2f76af71f6c 100644 (file)
@@ -97,12 +97,10 @@ flags       - flags of the cpufreq driver
 ==================================================================
 For details about OPP, see Documentation/power/opp.txt
 
-dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
-       cpufreq_table_validate_and_show() which is provided with the list of
-       frequencies that are available for operation. This function provides
-       a ready to use conversion routine to translate the OPP layer's internal
-       information about the available frequencies into a format readily
-       providable to cpufreq.
+dev_pm_opp_init_cpufreq_table -
+       This function provides a ready to use conversion routine to translate
+       the OPP layer's internal information about the available frequencies
+       into a format readily providable to cpufreq.
 
        WARNING: Do not use this function in interrupt context.
 
@@ -112,7 +110,7 @@ dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
                /* Do things */
                r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
                if (!r)
-                       cpufreq_table_validate_and_show(policy, freq_table);
+                       policy->freq_table = freq_table;
                /* Do other things */
         }
 
index 61546ac578d6079a56aecc643989647ed7261399..6e353d00cdc687eac454c4c586f7075cb73399eb 100644 (file)
@@ -259,10 +259,8 @@ CPUFREQ_ENTRY_INVALID. The entries don't need to be in sorted in any
 particular order, but if they are cpufreq core will do DVFS a bit
 quickly for them as search for best match is faster.
 
-By calling cpufreq_table_validate_and_show(), the cpuinfo.min_freq and
-cpuinfo.max_freq values are detected, and policy->min and policy->max
-are set to the same values. This is helpful for the per-CPU
-initialization stage.
+The cpufreq table is verified automatically by the core if the policy contains a
+valid pointer in its policy->freq_table field.
 
 cpufreq_frequency_table_verify() assures that at least one valid
 frequency is within policy->min and policy->max, and all other criteria
index b6f44f490ed7839f0963acfc0c53ed2537fa35e2..d1587f434e7bb6de8125a4601cee1bdcb24cae50 100644 (file)
@@ -40,6 +40,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 
@@ -50,6 +51,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 
@@ -60,6 +62,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 
@@ -70,6 +73,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 --------------------------------------------------------------------------------
@@ -78,6 +82,8 @@ total 0
 * desc : Small description about the idle state (string)
 * disable : Option to disable this idle state (bool) -> see note below
 * latency : Latency to exit out of this idle state (in microseconds)
+* residency : Time after which a state becomes more effecient than any
+  shallower state (in microseconds)
 * name : Name of the idle state (string)
 * power : Power consumed while in this idle state (in milliwatts)
 * time : Total time spent in this idle state (in microseconds)
diff --git a/Documentation/devicetree/bindings/dma/mtk-hsdma.txt b/Documentation/devicetree/bindings/dma/mtk-hsdma.txt
new file mode 100644 (file)
index 0000000..4bb3173
--- /dev/null
@@ -0,0 +1,33 @@
+MediaTek High-Speed DMA Controller
+==================================
+
+This device follows the generic DMA bindings defined in dma/dma.txt.
+
+Required properties:
+
+- compatible:  Must be one of
+                 "mediatek,mt7622-hsdma": for MT7622 SoC
+                 "mediatek,mt7623-hsdma": for MT7623 SoC
+- reg:         Should contain the register's base address and length.
+- interrupts:  Should contain a reference to the interrupt used by this
+               device.
+- clocks:      Should be the clock specifiers corresponding to the entry in
+               clock-names property.
+- clock-names: Should contain "hsdma" entries.
+- power-domains: Phandle to the power domain that the device is part of
+- #dma-cells:  The length of the DMA specifier, must be <1>. This one cell
+               in dmas property of a client device represents the channel
+               number.
+Example:
+
+        hsdma: dma-controller@1b007000 {
+               compatible = "mediatek,mt7623-hsdma";
+               reg = <0 0x1b007000 0 0x1000>;
+               interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_LOW>;
+               clocks = <&ethsys CLK_ETHSYS_HSDMA>;
+               clock-names = "hsdma";
+               power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
+               #dma-cells = <1>;
+       };
+
+DMA clients must use the format described in dma/dma.txt file.
index 9cbf5d9df8fd00d8e2101fd1d3fc22933410a244..cf5b9e44432c62b3d2b451e142dcc67db2ac11ec 100644 (file)
@@ -15,6 +15,10 @@ Required properties:
   the secure world.
 - qcom,controlled-remotely : optional, indicates that the bam is controlled by
   remote proccessor i.e. execution environment.
+- num-channels : optional, indicates supported number of DMA channels in a
+  remotely controlled bam.
+- qcom,num-ees : optional, indicates supported number of Execution Environments
+  in a remotely controlled bam.
 
 Example:
 
index 891db41e94201ce69944133854eaedc97b9e672a..aadfb236d53abdd12fc69cf36cd1940303441deb 100644 (file)
@@ -18,6 +18,7 @@ Required Properties:
              Examples with soctypes are:
                - "renesas,dmac-r8a7743" (RZ/G1M)
                - "renesas,dmac-r8a7745" (RZ/G1E)
+               - "renesas,dmac-r8a77470" (RZ/G1C)
                - "renesas,dmac-r8a7790" (R-Car H2)
                - "renesas,dmac-r8a7791" (R-Car M2-W)
                - "renesas,dmac-r8a7792" (R-Car V2H)
@@ -26,6 +27,7 @@ Required Properties:
                - "renesas,dmac-r8a7795" (R-Car H3)
                - "renesas,dmac-r8a7796" (R-Car M3-W)
                - "renesas,dmac-r8a77970" (R-Car V3M)
+               - "renesas,dmac-r8a77980" (R-Car V3H)
 
 - reg: base address and length of the registers block for the DMAC
 
index f3d1f151ba80a45e40e0c1d97535ad8e20af296e..9dc935e24e558f4f6edaa166a29fbd038bc66d48 100644 (file)
@@ -11,6 +11,7 @@ Required Properties:
          - "renesas,r8a7794-usb-dmac" (R-Car E2)
          - "renesas,r8a7795-usb-dmac" (R-Car H3)
          - "renesas,r8a7796-usb-dmac" (R-Car M3-W)
+         - "renesas,r8a77965-usb-dmac" (R-Car M3-N)
 - reg: base address and length of the registers block for the DMAC
 - interrupts: interrupt specifiers for the DMAC, one for each entry in
   interrupt-names.
diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
new file mode 100644 (file)
index 0000000..f237b79
--- /dev/null
@@ -0,0 +1,41 @@
+Synopsys DesignWare AXI DMA Controller
+
+Required properties:
+- compatible: "snps,axi-dma-1.01a"
+- reg: Address range of the DMAC registers. This should include
+  all of the per-channel registers.
+- interrupt: Should contain the DMAC interrupt number.
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device.
+- dma-channels: Number of channels supported by hardware.
+- snps,dma-masters: Number of AXI masters supported by the hardware.
+- snps,data-width: Maximum AXI data width supported by hardware.
+  (0 - 8bits, 1 - 16bits, 2 - 32bits, ..., 6 - 512bits)
+- snps,priority: Priority of channel. Array size is equal to the number of
+  dma-channels. Priority value must be programmed within [0:dma-channels-1]
+  range. (0 - minimum priority)
+- snps,block-size: Maximum block size supported by the controller channel.
+  Array size is equal to the number of dma-channels.
+
+Optional properties:
+- snps,axi-max-burst-len: Restrict master AXI burst length by value specified
+  in this property. If this property is missing the maximum AXI burst length
+  supported by DMAC is used. [1:256]
+
+Example:
+
+dmac: dma-controller@80000 {
+       compatible = "snps,axi-dma-1.01a";
+       reg = <0x80000 0x400>;
+       clocks = <&core_clk>, <&cfgr_clk>;
+       clock-names = "core-clk", "cfgr-clk";
+       interrupt-parent = <&intc>;
+       interrupts = <27>;
+
+       dma-channels = <4>;
+       snps,dma-masters = <2>;
+       snps,data-width = <3>;
+       snps,block-size = <4096 4096 4096 4096>;
+       snps,priority = <0 1 2 3>;
+       snps,axi-max-burst-len = <16>;
+};
index 0b55718bf88993a83eb5091128cc6761b93e5852..c5f519097204f847fee551879c67a9260f71103e 100644 (file)
@@ -62,14 +62,14 @@ channel: a phandle to the DMA controller plus the following four integer cells:
        0x1: medium
        0x2: high
        0x3: very high
-4. A 32bit mask specifying the DMA FIFO threshold configuration which are device
-   dependent:
- -bit 0-1: Fifo threshold
+4. A 32bit bitfield value specifying DMA features which are device dependent:
+ -bit 0-1: DMA FIFO threshold selection
        0x0: 1/4 full FIFO
        0x1: 1/2 full FIFO
        0x2: 3/4 full FIFO
        0x3: full FIFO
 
+
 Example:
 
        usart1: serial@40011000 {
index 1fd5d69647ca0dcc01e3cdd724dd4627e0d60e49..ffadb7c6f1f3a8329503f9c9bcc34aceffdf12b7 100644 (file)
@@ -11,6 +11,8 @@ Required Properties:
     the device is compatible with the R-Car Gen2 VMSA-compatible IPMMU.
 
     - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
+    - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU.
+    - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU.
     - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
     - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
     - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU.
@@ -19,7 +21,8 @@ Required Properties:
     - "renesas,ipmmu-r8a7796" for the R8A7796 (R-Car M3-W) IPMMU.
     - "renesas,ipmmu-r8a77970" for the R8A77970 (R-Car V3M) IPMMU.
     - "renesas,ipmmu-r8a77995" for the R8A77995 (R-Car D3) IPMMU.
-    - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU.
+    - "renesas,ipmmu-vmsa" for generic R-Car Gen2 or RZ/G1 VMSA-compatible
+                          IPMMU.
 
   - reg: Base address and size of the IPMMU registers.
   - interrupts: Specifiers for the MMU fault interrupts. For instances that
index 2098f7732264e41fddb8e58c22a309f2693715e7..6ecefea1c6f9b4647140e57fceb9134608b43acb 100644 (file)
@@ -14,6 +14,11 @@ Required properties:
                     "single-master" device, and needs no additional information
                     to associate with its master device.  See:
                     Documentation/devicetree/bindings/iommu/iommu.txt
+- clocks          : A list of clocks required for the IOMMU to be accessible by
+                    the host CPU.
+- clock-names     : Should contain the following:
+       "iface" - Main peripheral bus clock (PCLK/HCL) (required)
+       "aclk"  - AXI bus clock (required)
 
 Optional properties:
 - rockchip,disable-mmu-reset : Don't use the mmu reset operation.
@@ -27,5 +32,7 @@ Example:
                reg = <0xff940300 0x100>;
                interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-names = "vopl_mmu";
+               clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
+               clock-names = "aclk", "iface";
                #iommu-cells = <0>;
        };
diff --git a/Documentation/devicetree/bindings/mips/mscc.txt b/Documentation/devicetree/bindings/mips/mscc.txt
new file mode 100644 (file)
index 0000000..ae15ec3
--- /dev/null
@@ -0,0 +1,43 @@
+* Microsemi MIPS CPUs
+
+Boards with a SoC of the Microsemi MIPS family shall have the following
+properties:
+
+Required properties:
+- compatible: "mscc,ocelot"
+
+
+* Other peripherals:
+
+o CPU chip regs:
+
+The SoC has a few registers (DEVCPU_GCB:CHIP_REGS) handling miscellaneous
+functionalities: chip ID, general purpose register for software use, reset
+controller, hardware status and configuration, efuses.
+
+Required properties:
+- compatible: Should be "mscc,ocelot-chip-regs", "simple-mfd", "syscon"
+- reg : Should contain registers location and length
+
+Example:
+       syscon@71070000 {
+               compatible = "mscc,ocelot-chip-regs", "simple-mfd", "syscon";
+               reg = <0x71070000 0x1c>;
+       };
+
+
+o CPU system control:
+
+The SoC has a few registers (ICPU_CFG:CPU_SYSTEM_CTRL) handling configuration of
+the CPU: 8 general purpose registers, reset control, CPU en/disabling, CPU
+endianness, CPU bus control, CPU status.
+
+Required properties:
+- compatible: Should be "mscc,ocelot-cpu-syscon", "syscon"
+- reg : Should contain registers location and length
+
+Example:
+       syscon@70000000 {
+               compatible = "mscc,ocelot-cpu-syscon", "syscon";
+               reg = <0x70000000 0x2c>;
+       };
index 594982c6b9f9e2846163afa654f208ed74e888e1..79bf352e659cd1b999603317a62a7ec62fbac0b6 100644 (file)
@@ -6,7 +6,11 @@ the definition of the PHY node in booting-without-of.txt for an example
 of how to define a PHY.
 
 Required properties:
-  - reg : Offset and length of the register set for the device
+  - reg : Offset and length of the register set for the device, and optionally
+          the offset and length of the TBIPA register (TBI PHY address
+         register).  If TBIPA register is not specified, the driver will
+         attempt to infer it from the register set specified (your mileage may
+         vary).
   - compatible : Should define the compatible device type for the
     mdio. Currently supported strings/devices are:
        - "fsl,gianfar-tbi"
diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt
new file mode 100644 (file)
index 0000000..5cfa4f0
--- /dev/null
@@ -0,0 +1,65 @@
+Device-tree bindings for persistent memory regions
+-----------------------------------------------------
+
+Persistent memory refers to a class of memory devices that are:
+
+       a) Usable as main system memory (i.e. cacheable), and
+       b) Retain their contents across power failure.
+
+Given b) it is best to think of persistent memory as a kind of memory mapped
+storage device. To ensure data integrity the operating system needs to manage
+persistent regions separately to the normal memory pool. To aid with that this
+binding provides a standardised interface for discovering where persistent
+memory regions exist inside the physical address space.
+
+Bindings for the region nodes:
+-----------------------------
+
+Required properties:
+       - compatible = "pmem-region"
+
+       - reg = <base, size>;
+               The reg property should specificy an address range that is
+               translatable to a system physical address range. This address
+               range should be mappable as normal system memory would be
+               (i.e cacheable).
+
+               If the reg property contains multiple address ranges
+               each address range will be treated as though it was specified
+               in a separate device node. Having multiple address ranges in a
+               node implies no special relationship between the two ranges.
+
+Optional properties:
+       - Any relevant NUMA assocativity properties for the target platform.
+
+       - volatile; This property indicates that this region is actually
+         backed by non-persistent memory. This lets the OS know that it
+         may skip the cache flushes required to ensure data is made
+         persistent after a write.
+
+         If this property is absent then the OS must assume that the region
+         is backed by non-volatile memory.
+
+Examples:
+--------------------
+
+       /*
+        * This node specifies one 4KB region spanning from
+        * 0x5000 to 0x5fff that is backed by non-volatile memory.
+        */
+       pmem@5000 {
+               compatible = "pmem-region";
+               reg = <0x00005000 0x00001000>;
+       };
+
+       /*
+        * This node specifies two 4KB regions that are backed by
+        * volatile (normal) memory.
+        */
+       pmem@6000 {
+               compatible = "pmem-region";
+               reg = < 0x00006000 0x00001000
+                       0x00008000 0x00001000 >;
+               volatile;
+       };
+
diff --git a/Documentation/devicetree/bindings/rtc/isil,isl12026.txt b/Documentation/devicetree/bindings/rtc/isil,isl12026.txt
new file mode 100644 (file)
index 0000000..2e0be45
--- /dev/null
@@ -0,0 +1,28 @@
+ISL12026 I2C RTC/EEPROM
+
+ISL12026 is an I2C RTC/EEPROM combination device.  The RTC and control
+registers respond at bus address 0x6f, and the EEPROM array responds
+at bus address 0x57.  The canonical "reg" value will be for the RTC portion.
+
+Required properties supported by the device:
+
+ - "compatible": must be "isil,isl12026"
+ - "reg": I2C bus address of the device (always 0x6f)
+
+Optional properties:
+
+ - "isil,pwr-bsw": If present PWR.BSW bit must be set to the specified
+                   value for proper operation.
+
+ - "isil,pwr-sbib": If present PWR.SBIB bit must be set to the specified
+                    value for proper operation.
+
+
+Example:
+
+       rtc@6f {
+               compatible = "isil,isl12026";
+               reg = <0x6f>;
+               isil,pwr-bsw = <0>;
+               isil,pwr-sbib = <1>;
+       }
index 12e8b3e576b078dba7e46fb5671f3fca3c5bbff3..b5f978a4cac67471a7398d24a388fd04b88702ff 100644 (file)
@@ -225,6 +225,7 @@ motorola    Motorola, Inc.
 moxa   Moxa Inc.
 mpl    MPL AG
 mqmaker        mqmaker Inc.
+mscc   Microsemi Corporation
 msi    Micro-Star International Co. Ltd.
 mti    Imagination Technologies Ltd. (formerly MIPS Technologies Inc.)
 multi-inno     Multi-Inno Technology Co.,Ltd
index 0b302a11718a43fd7ed44725390a5a4ceacb2229..d7f011ddc1500cdf8e705430c90ed60deb7a2ecc 100644 (file)
@@ -62,6 +62,18 @@ subdirectories, and a summation of all nested file sizes.  This makes
 the identification of large disk space consumers relatively quick, as
 no 'du' or similar recursive scan of the file system is required.
 
+Finally, Ceph also allows quotas to be set on any directory in the system.
+The quota can restrict the number of bytes or the number of files stored
+beneath that point in the directory hierarchy.  Quotas can be set using
+extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg:
+
+ setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir
+ getfattr -n ceph.quota.max_bytes /some/dir
+
+A limitation of the current quotas implementation is that it relies on the
+cooperation of the client mounting the file system to stop writers when a
+limit is reached.  A modified or adversarial client cannot be prevented
+from writing as much data as it needs.
 
 Mount Syntax
 ============
@@ -137,6 +149,10 @@ Mount Options
   noasyncreaddir
        Do not use the dcache as above for readdir.
 
+  noquotadf
+        Report overall filesystem usage in statfs instead of using the root
+        directory quota.
+
 More Information
 ================
 
index 791bc0bd91e6b25d59d526742ef2b12c07a3a794..39033538eb030c80a503e164391fe54e674f197b 100644 (file)
@@ -6,6 +6,10 @@ Supported chips:
     Prefix: 'adm1075'
     Addresses scanned: -
     Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1075.pdf
+  * Analog Devices ADM1272
+    Prefix: 'adm1272'
+    Addresses scanned: -
+    Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1272.pdf
   * Analog Devices ADM1275
     Prefix: 'adm1275'
     Addresses scanned: -
@@ -29,11 +33,11 @@ Author: Guenter Roeck <linux@roeck-us.net>
 Description
 -----------
 
-This driver supports hardware monitoring for Analog Devices ADM1075, ADM1275,
-ADM1276, ADM1278, ADM1293, and ADM1294 Hot-Swap Controller and Digital
-Power Monitors.
+This driver supports hardware monitoring for Analog Devices ADM1075, ADM1272,
+ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294 Hot-Swap Controller and
+Digital Power Monitors.
 
-ADM1075, ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294 are hot-swap
+ADM1075, ADM1272, ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294 are hot-swap
 controllers that allow a circuit board to be removed from or inserted into
 a live backplane. They also feature current and voltage readback via an
 integrated 12 bit analog-to-digital converter (ADC), accessed using a
@@ -100,11 +104,10 @@ power1_input_lowest       Lowest observed input power. ADM1293 and ADM1294 only.
 power1_input_highest   Highest observed input power.
 power1_reset_history   Write any value to reset history.
 
-                       Power attributes are supported on ADM1075, ADM1276,
-                       ADM1293, and ADM1294.
+                       Power attributes are supported on ADM1075, ADM1272,
+                       ADM1276, ADM1293, and ADM1294.
 
 temp1_input            Chip temperature.
-                       Temperature attributes are only available on ADM1278.
 temp1_max              Maximum chip temperature.
 temp1_max_alarm                Temperature alarm.
 temp1_crit             Critical chip temperature.
@@ -112,4 +115,5 @@ temp1_crit_alarm    Critical temperature high alarm.
 temp1_highest          Highest observed temperature.
 temp1_reset_history    Write any value to reset history.
 
-                       Temperature attributes are supported on ADM1278.
+                       Temperature attributes are supported on ADM1272 and
+                       ADM1278.
index 22f68ad032cf983c200a36975691442f53f2b8d9..cfa99a353b8cb591a25a1a7601de83de2c6f2d1f 100644 (file)
@@ -11,10 +11,8 @@ Supported chips:
     Addresses scanned: none, force parameter needed
     Datasheet: http://www.national.com/pf/LM/LM76.html
   * Maxim MAX6633/MAX6634/MAX6635
-    Prefix: 'lm92'
-    Addresses scanned: I2C 0x48 - 0x4b
-    MAX6633 with address in 0x40 - 0x47, 0x4c - 0x4f needs force parameter
-    and MAX6634 with address in 0x4c - 0x4f needs force parameter
+    Prefix: 'max6635'
+    Addresses scanned: none, force parameter needed
     Datasheet: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3074
 
 Authors:
index 76add4c9cd6893f4aa6aeaae427bf6becec6b4a3..bd59834d310ff7c73c424a7bccb59f6d8d7a7c7c 100644 (file)
@@ -36,6 +36,14 @@ Supported chips:
     Prefix: 'nct6793'
     Addresses scanned: ISA address retrieved from Super I/O registers
     Datasheet: Available from Nuvoton upon request
+  * Nuvoton NCT6795D
+    Prefix: 'nct6795'
+    Addresses scanned: ISA address retrieved from Super I/O registers
+    Datasheet: Available from Nuvoton upon request
+  * Nuvoton NCT6796D
+    Prefix: 'nct6796'
+    Addresses scanned: ISA address retrieved from Super I/O registers
+    Datasheet: Available from Nuvoton upon request
 
 Authors:
         Guenter Roeck <linux@roeck-us.net>
@@ -88,10 +96,10 @@ The mode works for fan1-fan5.
 sysfs attributes
 ----------------
 
-pwm[1-5] - this file stores PWM duty cycle or DC value (fan speed) in range:
+pwm[1-7] - this file stores PWM duty cycle or DC value (fan speed) in range:
           0 (lowest speed) to 255 (full)
 
-pwm[1-5]_enable - this file controls mode of fan/temperature control:
+pwm[1-7]_enable - this file controls mode of fan/temperature control:
        * 0 Fan control disabled (fans set to maximum speed)
        * 1 Manual mode, write to pwm[0-5] any value 0-255
        * 2 "Thermal Cruise" mode
@@ -99,16 +107,16 @@ pwm[1-5]_enable - this file controls mode of fan/temperature control:
        * 4 "Smart Fan III" mode (NCT6775F only)
        * 5 "Smart Fan IV" mode
 
-pwm[1-5]_mode - controls if output is PWM or DC level
+pwm[1-7]_mode - controls if output is PWM or DC level
         * 0 DC output
         * 1 PWM output
 
 Common fan control attributes
 -----------------------------
 
-pwm[1-5]_temp_sel      Temperature source. Value is temperature sensor index.
+pwm[1-7]_temp_sel      Temperature source. Value is temperature sensor index.
                        For example, select '1' for temp1_input.
-pwm[1-5]_weight_temp_sel
+pwm[1-7]_weight_temp_sel
                        Secondary temperature source. Value is temperature
                        sensor index. For example, select '1' for temp1_input.
                        Set to 0 to disable secondary temperature control.
@@ -116,16 +124,16 @@ pwm[1-5]_weight_temp_sel
 If secondary temperature functionality is enabled, it is controlled with the
 following attributes.
 
-pwm[1-5]_weight_duty_step
+pwm[1-7]_weight_duty_step
                        Duty step size.
-pwm[1-5]_weight_temp_step
+pwm[1-7]_weight_temp_step
                        Temperature step size. With each step over
                        temp_step_base, the value of weight_duty_step is added
                        to the current pwm value.
-pwm[1-5]_weight_temp_step_base
+pwm[1-7]_weight_temp_step_base
                        Temperature at which secondary temperature control kicks
                        in.
-pwm[1-5]_weight_temp_step_tol
+pwm[1-7]_weight_temp_step_tol
                        Temperature step tolerance.
 
 Thermal Cruise mode (2)
@@ -133,9 +141,9 @@ Thermal Cruise mode (2)
 
 If the temperature is in the range defined by:
 
-pwm[1-5]_target_temp   Target temperature, unit millidegree Celsius
+pwm[1-7]_target_temp   Target temperature, unit millidegree Celsius
                        (range 0 - 127000)
-pwm[1-5]_temp_tolerance
+pwm[1-7]_temp_tolerance
                        Target temperature tolerance, unit millidegree Celsius
 
 there are no changes to fan speed. Once the temperature leaves the interval, fan
@@ -143,14 +151,14 @@ speed increases (if temperature is higher that desired) or decreases (if
 temperature is lower than desired), using the following limits and time
 intervals.
 
-pwm[1-5]_start         fan pwm start value (range 1 - 255), to start fan
+pwm[1-7]_start         fan pwm start value (range 1 - 255), to start fan
                        when the temperature is above defined range.
-pwm[1-5]_floor         lowest fan pwm (range 0 - 255) if temperature is below
+pwm[1-7]_floor         lowest fan pwm (range 0 - 255) if temperature is below
                        the defined range. If set to 0, the fan is expected to
                        stop if the temperature is below the defined range.
-pwm[1-5]_step_up_time  milliseconds before fan speed is increased
-pwm[1-5]_step_down_time        milliseconds before fan speed is decreased
-pwm[1-5]_stop_time     how many milliseconds must elapse to switch
+pwm[1-7]_step_up_time  milliseconds before fan speed is increased
+pwm[1-7]_step_down_time        milliseconds before fan speed is decreased
+pwm[1-7]_stop_time     how many milliseconds must elapse to switch
                        corresponding fan off (when the temperature was below
                        defined range).
 
@@ -159,8 +167,8 @@ Speed Cruise mode (3)
 
 This modes tries to keep the fan speed constant.
 
-fan[1-5]_target                Target fan speed
-fan[1-5]_tolerance
+fan[1-7]_target                Target fan speed
+fan[1-7]_tolerance
                        Target speed tolerance
 
 
@@ -177,19 +185,19 @@ points should be set to higher temperatures and higher pwm values to achieve
 higher fan speeds with increasing temperature. The last data point reflects
 critical temperature mode, in which the fans should run at full speed.
 
-pwm[1-5]_auto_point[1-7]_pwm
+pwm[1-7]_auto_point[1-7]_pwm
                        pwm value to be set if temperature reaches matching
                        temperature range.
-pwm[1-5]_auto_point[1-7]_temp
+pwm[1-7]_auto_point[1-7]_temp
                        Temperature over which the matching pwm is enabled.
-pwm[1-5]_temp_tolerance
+pwm[1-7]_temp_tolerance
                        Temperature tolerance, unit millidegree Celsius
-pwm[1-5]_crit_temp_tolerance
+pwm[1-7]_crit_temp_tolerance
                        Temperature tolerance for critical temperature,
                        unit millidegree Celsius
 
-pwm[1-5]_step_up_time  milliseconds before fan speed is increased
-pwm[1-5]_step_down_time        milliseconds before fan speed is decreased
+pwm[1-7]_step_up_time  milliseconds before fan speed is increased
+pwm[1-7]_step_down_time        milliseconds before fan speed is decreased
 
 Usage Notes
 -----------
index 47f4765db256c9e61e36aa2cffb196780995316f..8b3cdda541c1cc67a59e8fbddd31deb56ca961a6 100644 (file)
@@ -6,13 +6,13 @@ Supported chips:
     Prefix: 'sht21'
     Addresses scanned: none
     Datasheet: Publicly available at the Sensirion website
-    http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT21.pdf
+    http://www.sensirion.com/file/datasheet_sht21
 
   * Sensirion SHT25
-    Prefix: 'sht21'
+    Prefix: 'sht25'
     Addresses scanned: none
     Datasheet: Publicly available at the Sensirion website
-    http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT25.pdf
+    http://www.sensirion.com/file/datasheet_sht25
 
 Author:
   Urs Fleisch <urs.fleisch@sensirion.com>
index b0d88184f48ee16905b910dc4c3ee2f9086c3423..d9daa6ab1e8eeec62c3a08af240e11057802cbc1 100644 (file)
@@ -5,7 +5,7 @@ Supported chips:
   * Sensirion SHT3x-DIS
     Prefix: 'sht3x'
     Addresses scanned: none
-    Datasheet: http://www.sensirion.com/fileadmin/user_upload/customers/sensirion/Dokumente/Humidity/Sensirion_Humidity_Datasheet_SHT3x_DIS.pdf
+    Datasheet: https://www.sensirion.com/file/datasheet_sht3x_digital
 
 Author:
   David Frey <david.frey@sensirion.com>
index 7bb0505b60f1c2c5ac76afb7cd6b5067b54052eb..eb03ccc41c41c931036966bd8d73575ccfe4d308 100644 (file)
@@ -31,7 +31,7 @@ of the video device exits.
 The default :c:func:`video_device_release` callback currently
 just calls ``kfree`` to free the allocated memory.
 
-There is also a ::c:func:`video_device_release_empty` function that does
+There is also a :c:func:`video_device_release_empty` function that does
 nothing (is empty) and should be used if the struct is embedded and there
 is nothing to do when it is released.
 
index 45e76e5bc1ea864b9c224c3e6749cd50a205e996..582fda48881034db6d8d6d1d9a65933b27f1aabb 100644 (file)
@@ -89,7 +89,7 @@ id's until they get an error.
 
        -
        -
-       -  Entity type, see :ref:`media-entity-type` for details.
+       -  Entity type, see :ref:`media-entity-functions` for details.
 
     -  .. row 4
 
index c8f9ea37db2dd5c22bf7dce171f945a3dd91694e..c4055ddf070a1ac66415c1d67e0d2e3faa86a18a 100644 (file)
@@ -205,13 +205,13 @@ desired arrays with the media graph elements.
 
        -  ``function``
 
-       -  Entity main function, see :ref:`media-entity-type` for details.
+       -  Entity main function, see :ref:`media-entity-functions` for details.
 
     -  .. row 4
 
        -  __u32
 
-       -  ``reserved``\ [12]
+       -  ``reserved``\ [6]
 
        -  Reserved for future extensions. Drivers and applications must set
          this array to zero.
@@ -334,7 +334,7 @@ desired arrays with the media graph elements.
 
        -  __u32
 
-       -  ``reserved``\ [9]
+       -  ``reserved``\ [5]
 
        -  Reserved for future extensions. Drivers and applications must set
          this array to zero.
@@ -390,7 +390,7 @@ desired arrays with the media graph elements.
 
        -  __u32
 
-       -  ``reserved``\ [5]
+       -  ``reserved``\ [6]
 
        -  Reserved for future extensions. Drivers and applications must set
          this array to zero.
index f92f10b7ffbd9a16a9a9bef787745555a76ee086..2dda14bd89b7d2608edb2223327ad1b5aa84236b 100644 (file)
@@ -7,11 +7,11 @@ Types and flags used to represent the media graph elements
 
 ..  tabularcolumns:: |p{8.2cm}|p{10.3cm}|
 
-.. _media-entity-type:
+.. _media-entity-functions:
 
 .. cssclass:: longtable
 
-.. flat-table:: Media entity types
+.. flat-table:: Media entity functions
     :header-rows:  0
     :stub-columns: 0
 
index d5f3eb6e674ac8602839a4b4bcd92c4da17376c8..03931f9b1285470d0406bfae636ba52ec5dae989 100644 (file)
@@ -3565,7 +3565,7 @@ enum v4l2_dv_it_content_type -
     HDMI carries 5V on one of the pins). This is often used to power an
     eeprom which contains EDID information, such that the source can
     read the EDID even if the sink is in standby/power off. Each bit
-    corresponds to an input pad on the transmitter. If an input pad
+    corresponds to an input pad on the receiver. If an input pad
     cannot detect whether power is present, then the bit for that pad
     will be 0. This read-only control is applicable to DVI-D, HDMI and
     DisplayPort connectors.
index 337e8188caf17485397a0f84444758ece7149174..ef52f637d8e9c86268bd0677ee110d31118d3d28 100644 (file)
@@ -55,12 +55,14 @@ describing all planes of that format.
       - ``pixelformat``
       - The pixel format. Both single- and multi-planar four character
        codes can be used.
-    * - enum :c:type:`v4l2_field`
+    * - __u32
       - ``field``
-      - See struct :c:type:`v4l2_pix_format`.
-    * - enum :c:type:`v4l2_colorspace`
+      - Field order, from enum :c:type:`v4l2_field`.
+        See struct :c:type:`v4l2_pix_format`.
+    * - __u32
       - ``colorspace``
-      - See struct :c:type:`v4l2_pix_format`.
+      - Colorspace encoding, from enum :c:type:`v4l2_colorspace`.
+        See struct :c:type:`v4l2_pix_format`.
     * - struct :c:type:`v4l2_plane_pix_format`
       - ``plane_fmt[VIDEO_MAX_PLANES]``
       - An array of structures describing format of each plane this pixel
@@ -73,24 +75,34 @@ describing all planes of that format.
     * - __u8
       - ``flags``
       - Flags set by the application or driver, see :ref:`format-flags`.
-    * - enum :c:type:`v4l2_ycbcr_encoding`
+    * - union {
+      - (anonymous)
+      -
+    * - __u8
       - ``ycbcr_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`.
+        This information supplements the ``colorspace`` and must be set by
        the driver for capture streams and by the application for output
        streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_hsv_encoding`
+    * - __u8
       - ``hsv_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - HSV encoding, from enum :c:type:`v4l2_hsv_encoding`.
+        This information supplements the ``colorspace`` and must be set by
        the driver for capture streams and by the application for output
        streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_quantization`
+    * - }
+      -
+      -
+    * - __u8
       - ``quantization``
-      - This information supplements the ``colorspace`` and must be set by
+      - Quantization range, from enum :c:type:`v4l2_quantization`.
+        This information supplements the ``colorspace`` and must be set by
        the driver for capture streams and by the application for output
        streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_xfer_func`
+    * - __u8
       - ``xfer_func``
-      - This information supplements the ``colorspace`` and must be set by
+      - Transfer function, from enum :c:type:`v4l2_xfer_func`.
+        This information supplements the ``colorspace`` and must be set by
        the driver for capture streams and by the application for output
        streams, see :ref:`colorspaces`.
     * - __u8
index 6622938c1b4126154965d328532c6dff74522fdb..826f2305da01ef93d322c25327af7da135c75cb6 100644 (file)
@@ -40,9 +40,10 @@ Single-planar format structure
        RGB formats in :ref:`rgb-formats`, YUV formats in
        :ref:`yuv-formats`, and reserved codes in
        :ref:`reserved-formats`
-    * - enum :c:type:`v4l2_field`
+    * - __u32
       - ``field``
-      - Video images are typically interlaced. Applications can request to
+      - Field order, from enum :c:type:`v4l2_field`.
+        Video images are typically interlaced. Applications can request to
        capture or output only the top or bottom field, or both fields
        interlaced or sequentially stored in one buffer or alternating in
        separate buffers. Drivers return the actual field order selected.
@@ -82,9 +83,10 @@ Single-planar format structure
        driver. Usually this is ``bytesperline`` times ``height``. When
        the image consists of variable length compressed data this is the
        maximum number of bytes required to hold an image.
-    * - enum :c:type:`v4l2_colorspace`
+    * - __u32
       - ``colorspace``
-      - This information supplements the ``pixelformat`` and must be set
+      - Image colorspace, from enum :c:type:`v4l2_colorspace`.
+        This information supplements the ``pixelformat`` and must be set
        by the driver for capture streams and by the application for
        output streams, see :ref:`colorspaces`.
     * - __u32
@@ -116,23 +118,33 @@ Single-planar format structure
     * - __u32
       - ``flags``
       - Flags set by the application or driver, see :ref:`format-flags`.
-    * - enum :c:type:`v4l2_ycbcr_encoding`
+    * - union {
+      - (anonymous)
+      -
+    * - __u32
       - ``ycbcr_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`.
+        This information supplements the ``colorspace`` and must be set by
        the driver for capture streams and by the application for output
        streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_hsv_encoding`
+    * - __u32
       - ``hsv_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - HSV encoding, from enum :c:type:`v4l2_hsv_encoding`.
+        This information supplements the ``colorspace`` and must be set by
        the driver for capture streams and by the application for output
        streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_quantization`
+    * - }
+      -
+      -
+    * - __u32
       - ``quantization``
-      - This information supplements the ``colorspace`` and must be set by
+      - Quantization range, from enum :c:type:`v4l2_quantization`.
+        This information supplements the ``colorspace`` and must be set by
        the driver for capture streams and by the application for output
        streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_xfer_func`
+    * - __u32
       - ``xfer_func``
-      - This information supplements the ``colorspace`` and must be set by
+      - Transfer function, from enum :c:type:`v4l2_xfer_func`.
+        This information supplements the ``colorspace`` and must be set by
        the driver for capture streams and by the application for output
        streams, see :ref:`colorspaces`.
index 26b106071364c8d6462c2fad0dbddfd3958d96b9..eb4b185d168c0524326793ea7152c02ee3793c82 100644 (file)
@@ -58,6 +58,14 @@ can never be transgressed.  If there is a good reason to go against the
 style (a line which becomes far less readable if split to fit within the
 80-column limit, for example), just do it.
 
+Note that you can also use the ``clang-format`` tool to help you with
+these rules, to quickly re-format parts of your code automatically,
+and to review full files in order to spot coding style mistakes,
+typos and possible improvements. It is also handy for sorting ``#includes``,
+for aligning variables/macros, for reflowing text and other similar tasks.
+See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
+for more details.
+
 
 Abstraction layers
 ******************
diff --git a/Documentation/process/clang-format.rst b/Documentation/process/clang-format.rst
new file mode 100644 (file)
index 0000000..6710c07
--- /dev/null
@@ -0,0 +1,184 @@
+.. _clangformat:
+
+clang-format
+============
+
+``clang-format`` is a tool to format C/C++/... code according to
+a set of rules and heuristics. Like most tools, it is not perfect
+nor covers every single case, but it is good enough to be helpful.
+
+``clang-format`` can be used for several purposes:
+
+  - Quickly reformat a block of code to the kernel style. Specially useful
+    when moving code around and aligning/sorting. See clangformatreformat_.
+
+  - Spot style mistakes, typos and possible improvements in files
+    you maintain, patches you review, diffs, etc. See clangformatreview_.
+
+  - Help you follow the coding style rules, specially useful for those
+    new to kernel development or working at the same time in several
+    projects with different coding styles.
+
+Its configuration file is ``.clang-format`` in the root of the kernel tree.
+The rules contained there try to approximate the most common kernel
+coding style. They also try to follow :ref:`Documentation/process/coding-style.rst <codingstyle>`
+as much as possible. Since not all the kernel follows the same style,
+it is possible that you may want to tweak the defaults for a particular
+subsystem or folder. To do so, you can override the defaults by writing
+another ``.clang-format`` file in a subfolder.
+
+The tool itself has already been included in the repositories of popular
+Linux distributions for a long time. Search for ``clang-format`` in
+your repositories. Otherwise, you can either download pre-built
+LLVM/clang binaries or build the source code from:
+
+    http://releases.llvm.org/download.html
+
+See more information about the tool at:
+
+    https://clang.llvm.org/docs/ClangFormat.html
+
+    https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+
+
+.. _clangformatreview:
+
+Review files and patches for coding style
+-----------------------------------------
+
+By running the tool in its inline mode, you can review full subsystems,
+folders or individual files for code style mistakes, typos or improvements.
+
+To do so, you can run something like::
+
+    # Make sure your working directory is clean!
+    clang-format -i kernel/*.[ch]
+
+And then take a look at the git diff.
+
+Counting the lines of such a diff is also useful for improving/tweaking
+the style options in the configuration file; as well as testing new
+``clang-format`` features/versions.
+
+``clang-format`` also supports reading unified diffs, so you can review
+patches and git diffs easily. See the documentation at:
+
+    https://clang.llvm.org/docs/ClangFormat.html#script-for-patch-reformatting
+
+To avoid ``clang-format`` formatting some portion of a file, you can do::
+
+    int formatted_code;
+    // clang-format off
+        void    unformatted_code  ;
+    // clang-format on
+    void formatted_code_again;
+
+While it might be tempting to use this to keep a file always in sync with
+``clang-format``, specially if you are writing new files or if you are
+a maintainer, please note that people might be running different
+``clang-format`` versions or not have it available at all. Therefore,
+you should probably refrain yourself from using this in kernel sources;
+at least until we see if ``clang-format`` becomes commonplace.
+
+
+.. _clangformatreformat:
+
+Reformatting blocks of code
+---------------------------
+
+By using an integration with your text editor, you can reformat arbitrary
+blocks (selections) of code with a single keystroke. This is specially
+useful when moving code around, for complex code that is deeply intended,
+for multi-line macros (and aligning their backslashes), etc.
+
+Remember that you can always tweak the changes afterwards in those cases
+where the tool did not do an optimal job. But as a first approximation,
+it can be very useful.
+
+There are integrations for many popular text editors. For some of them,
+like vim, emacs, BBEdit and Visual Studio you can find support built-in.
+For instructions, read the appropiate section at:
+
+    https://clang.llvm.org/docs/ClangFormat.html
+
+For Atom, Eclipse, Sublime Text, Visual Studio Code, XCode and other
+editors and IDEs you should be able to find ready-to-use plugins.
+
+For this use case, consider using a secondary ``.clang-format``
+so that you can tweak a few options. See clangformatextra_.
+
+
+.. _clangformatmissing:
+
+Missing support
+---------------
+
+``clang-format`` is missing support for some things that are common
+in kernel code. They are easy to remember, so if you use the tool
+regularly, you will quickly learn to avoid/ignore those.
+
+In particular, some very common ones you will notice are:
+
+  - Aligned blocks of one-line ``#defines``, e.g.::
+
+        #define TRACING_MAP_BITS_DEFAULT       11
+        #define TRACING_MAP_BITS_MAX           17
+        #define TRACING_MAP_BITS_MIN           7
+
+    vs.::
+
+        #define TRACING_MAP_BITS_DEFAULT 11
+        #define TRACING_MAP_BITS_MAX 17
+        #define TRACING_MAP_BITS_MIN 7
+
+  - Aligned designated initializers, e.g.::
+
+        static const struct file_operations uprobe_events_ops = {
+                .owner          = THIS_MODULE,
+                .open           = probes_open,
+                .read           = seq_read,
+                .llseek         = seq_lseek,
+                .release        = seq_release,
+                .write          = probes_write,
+        };
+
+    vs.::
+
+        static const struct file_operations uprobe_events_ops = {
+                .owner = THIS_MODULE,
+                .open = probes_open,
+                .read = seq_read,
+                .llseek = seq_lseek,
+                .release = seq_release,
+                .write = probes_write,
+        };
+
+
+.. _clangformatextra:
+
+Extra features/options
+----------------------
+
+Some features/style options are not enabled by default in the configuration
+file in order to minimize the differences between the output and the current
+code. In other words, to make the difference as small as possible,
+which makes reviewing full-file style, as well diffs and patches as easy
+as possible.
+
+In other cases (e.g. particular subsystems/folders/files), the kernel style
+might be different and enabling some of these options may approximate
+better the style there.
+
+For instance:
+
+  - Aligning assignments (``AlignConsecutiveAssignments``).
+
+  - Aligning declarations (``AlignConsecutiveDeclarations``).
+
+  - Reflowing text in comments (``ReflowComments``).
+
+  - Sorting ``#includes`` (``SortIncludes``).
+
+They are typically useful for block re-formatting, rather than full-file.
+You might want to create another ``.clang-format`` file and use that one
+from your editor/IDE instead.
index d98deb62c4002c9341589a8416a78d81b304ae60..4e7c0a1c427a9ae8d4bee3b0796c0ec5363c293e 100644 (file)
@@ -631,6 +631,14 @@ options ``-kr -i8`` (stands for ``K&R, 8 character indents``), or use
 re-formatting you may want to take a look at the man page.  But
 remember: ``indent`` is not a fix for bad programming.
 
+Note that you can also use the ``clang-format`` tool to help you with
+these rules, to quickly re-format parts of your code automatically,
+and to review full files in order to spot coding style mistakes,
+typos and possible improvements. It is also handy for sorting ``#includes``,
+for aligning variables/macros, for reflowing text and other similar tasks.
+See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
+for more details.
+
 
 10) Kconfig configuration files
 -------------------------------
index 412314eebda66552331e235b7bdbc3210115b978..eded671d55eb2149ae35c7a005010176646f8194 100644 (file)
@@ -964,32 +964,34 @@ detect a hard lockup condition.
 
 tainted:
 
-Non-zero if the kernel has been tainted.  Numeric values, which
-can be ORed together:
-
-   1 - A module with a non-GPL license has been loaded, this
-       includes modules with no license.
-       Set by modutils >= 2.4.9 and module-init-tools.
-   2 - A module was force loaded by insmod -f.
-       Set by modutils >= 2.4.9 and module-init-tools.
-   4 - Unsafe SMP processors: SMP with CPUs not designed for SMP.
-   8 - A module was forcibly unloaded from the system by rmmod -f.
-  16 - A hardware machine check error occurred on the system.
-  32 - A bad page was discovered on the system.
-  64 - The user has asked that the system be marked "tainted".  This
-       could be because they are running software that directly modifies
-       the hardware, or for other reasons.
- 128 - The system has died.
- 256 - The ACPI DSDT has been overridden with one supplied by the user
-        instead of using the one provided by the hardware.
- 512 - A kernel warning has occurred.
-1024 - A module from drivers/staging was loaded.
-2048 - The system is working around a severe firmware bug.
-4096 - An out-of-tree module has been loaded.
-8192 - An unsigned module has been loaded in a kernel supporting module
-       signature.
-16384 - A soft lockup has previously occurred on the system.
-32768 - The kernel has been live patched.
+Non-zero if the kernel has been tainted. Numeric values, which can be
+ORed together. The letters are seen in "Tainted" line of Oops reports.
+
+     1 (P):  A module with a non-GPL license has been loaded, this
+             includes modules with no license.
+             Set by modutils >= 2.4.9 and module-init-tools.
+     2 (F): A module was force loaded by insmod -f.
+            Set by modutils >= 2.4.9 and module-init-tools.
+     4 (S): Unsafe SMP processors: SMP with CPUs not designed for SMP.
+     8 (R): A module was forcibly unloaded from the system by rmmod -f.
+    16 (M): A hardware machine check error occurred on the system.
+    32 (B): A bad page was discovered on the system.
+    64 (U): The user has asked that the system be marked "tainted". This
+            could be because they are running software that directly modifies
+            the hardware, or for other reasons.
+   128 (D): The system has died.
+   256 (A): The ACPI DSDT has been overridden with one supplied by the user
+            instead of using the one provided by the hardware.
+   512 (W): A kernel warning has occurred.
+  1024 (C): A module from drivers/staging was loaded.
+  2048 (I): The system is working around a severe firmware bug.
+  4096 (O): An out-of-tree module has been loaded.
+  8192 (E): An unsigned module has been loaded in a kernel supporting module
+            signature.
+ 16384 (L): A soft lockup has previously occurred on the system.
+ 32768 (K): The kernel has been live patched.
+ 65536 (X): Auxiliary taint, defined and used by for distros.
+131072 (T): The kernel was built with the struct randomization plugin.
 
 ==============================================================
 
index ff234d229cbbc581ec53a41dcc7725565d0747d4..17256f2ad919aa99cd2271eb8b05d09d5c8e2b62 100644 (file)
@@ -312,8 +312,6 @@ The lowmem_reserve_ratio is an array. You can see them by reading this file.
 % cat /proc/sys/vm/lowmem_reserve_ratio
 256     256     32
 -
-Note: # of this elements is one fewer than number of zones. Because the highest
-      zone's value is not necessary for following calculation.
 
 But, these values are not used directly. The kernel calculates # of protection
 pages for each zones from them. These are shown as array of protection pages
@@ -364,7 +362,8 @@ As above expression, they are reciprocal number of ratio.
 pages of higher zones on the node.
 
 If you would like to protect more pages, smaller values are effective.
-The minimum value is 1 (1/1 -> 100%).
+The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
+disables protection of the pages.
 
 ==============================================================
 
index bdf1963ba6baaf2841a8ae0dcafccf1e8dead49a..a5ea2cb0082bed22806784a45a576cd7ed8f1204 100644 (file)
@@ -520,1550 +520,4 @@ The following commands are supported:
   totals derived from one or more trace event format fields and/or
   event counts (hitcount).
 
-  The format of a hist trigger is as follows::
-
-        hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>]
-          [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue]
-          [:clear][:name=histname1] [if <filter>]
-
-  When a matching event is hit, an entry is added to a hash table
-  using the key(s) and value(s) named.  Keys and values correspond to
-  fields in the event's format description.  Values must correspond to
-  numeric fields - on an event hit, the value(s) will be added to a
-  sum kept for that field.  The special string 'hitcount' can be used
-  in place of an explicit value field - this is simply a count of
-  event hits.  If 'values' isn't specified, an implicit 'hitcount'
-  value will be automatically created and used as the only value.
-  Keys can be any field, or the special string 'stacktrace', which
-  will use the event's kernel stacktrace as the key.  The keywords
-  'keys' or 'key' can be used to specify keys, and the keywords
-  'values', 'vals', or 'val' can be used to specify values.  Compound
-  keys consisting of up to two fields can be specified by the 'keys'
-  keyword.  Hashing a compound key produces a unique entry in the
-  table for each unique combination of component keys, and can be
-  useful for providing more fine-grained summaries of event data.
-  Additionally, sort keys consisting of up to two fields can be
-  specified by the 'sort' keyword.  If more than one field is
-  specified, the result will be a 'sort within a sort': the first key
-  is taken to be the primary sort key and the second the secondary
-  key.  If a hist trigger is given a name using the 'name' parameter,
-  its histogram data will be shared with other triggers of the same
-  name, and trigger hits will update this common data.  Only triggers
-  with 'compatible' fields can be combined in this way; triggers are
-  'compatible' if the fields named in the trigger share the same
-  number and type of fields and those fields also have the same names.
-  Note that any two events always share the compatible 'hitcount' and
-  'stacktrace' fields and can therefore be combined using those
-  fields, however pointless that may be.
-
-  'hist' triggers add a 'hist' file to each event's subdirectory.
-  Reading the 'hist' file for the event will dump the hash table in
-  its entirety to stdout.  If there are multiple hist triggers
-  attached to an event, there will be a table for each trigger in the
-  output.  The table displayed for a named trigger will be the same as
-  any other instance having the same name. Each printed hash table
-  entry is a simple list of the keys and values comprising the entry;
-  keys are printed first and are delineated by curly braces, and are
-  followed by the set of value fields for the entry.  By default,
-  numeric fields are displayed as base-10 integers.  This can be
-  modified by appending any of the following modifiers to the field
-  name:
-
-        - .hex        display a number as a hex value
-       - .sym        display an address as a symbol
-       - .sym-offset display an address as a symbol and offset
-       - .syscall    display a syscall id as a system call name
-       - .execname   display a common_pid as a program name
-
-  Note that in general the semantics of a given field aren't
-  interpreted when applying a modifier to it, but there are some
-  restrictions to be aware of in this regard:
-
-    - only the 'hex' modifier can be used for values (because values
-      are essentially sums, and the other modifiers don't make sense
-      in that context).
-    - the 'execname' modifier can only be used on a 'common_pid'.  The
-      reason for this is that the execname is simply the 'comm' value
-      saved for the 'current' process when an event was triggered,
-      which is the same as the common_pid value saved by the event
-      tracing code.  Trying to apply that comm value to other pid
-      values wouldn't be correct, and typically events that care save
-      pid-specific comm fields in the event itself.
-
-  A typical usage scenario would be the following to enable a hist
-  trigger, read its current contents, and then turn it off::
-
-         # echo 'hist:keys=skbaddr.hex:vals=len' > \
-           /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-         # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
-
-         # echo '!hist:keys=skbaddr.hex:vals=len' > \
-           /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-  The trigger file itself can be read to show the details of the
-  currently attached hist trigger.  This information is also displayed
-  at the top of the 'hist' file when read.
-
-  By default, the size of the hash table is 2048 entries.  The 'size'
-  parameter can be used to specify more or fewer than that.  The units
-  are in terms of hashtable entries - if a run uses more entries than
-  specified, the results will show the number of 'drops', the number
-  of hits that were ignored.  The size should be a power of 2 between
-  128 and 131072 (any non- power-of-2 number specified will be rounded
-  up).
-
-  The 'sort' parameter can be used to specify a value field to sort
-  on.  The default if unspecified is 'hitcount' and the default sort
-  order is 'ascending'.  To sort in the opposite direction, append
-  .descending' to the sort key.
-
-  The 'pause' parameter can be used to pause an existing hist trigger
-  or to start a hist trigger but not log any events until told to do
-  so.  'continue' or 'cont' can be used to start or restart a paused
-  hist trigger.
-
-  The 'clear' parameter will clear the contents of a running hist
-  trigger and leave its current paused/active state.
-
-  Note that the 'pause', 'cont', and 'clear' parameters should be
-  applied using 'append' shell operator ('>>') if applied to an
-  existing trigger, rather than via the '>' operator, which will cause
-  the trigger to be removed through truncation.
-
-- enable_hist/disable_hist
-
-  The enable_hist and disable_hist triggers can be used to have one
-  event conditionally start and stop another event's already-attached
-  hist trigger.  Any number of enable_hist and disable_hist triggers
-  can be attached to a given event, allowing that event to kick off
-  and stop aggregations on a host of other events.
-
-  The format is very similar to the enable/disable_event triggers::
-
-      enable_hist:<system>:<event>[:count]
-      disable_hist:<system>:<event>[:count]
-
-  Instead of enabling or disabling the tracing of the target event
-  into the trace buffer as the enable/disable_event triggers do, the
-  enable/disable_hist triggers enable or disable the aggregation of
-  the target event into a hash table.
-
-  A typical usage scenario for the enable_hist/disable_hist triggers
-  would be to first set up a paused hist trigger on some event,
-  followed by an enable_hist/disable_hist pair that turns the hist
-  aggregation on and off when conditions of interest are hit::
-
-         # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-         # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-
-         # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-
-  The above sets up an initially paused hist trigger which is unpaused
-  and starts aggregating events when a given program is executed, and
-  which stops aggregating when the process exits and the hist trigger
-  is paused again.
-
-  The examples below provide a more concrete illustration of the
-  concepts and typical usage patterns discussed above.
-
-
-6.2 'hist' trigger examples
----------------------------
-
-  The first set of examples creates aggregations using the kmalloc
-  event.  The fields that can be used for the hist trigger are listed
-  in the kmalloc event's format file::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format
-    name: kmalloc
-    ID: 374
-    format:
-       field:unsigned short common_type;       offset:0;       size:2; signed:0;
-       field:unsigned char common_flags;       offset:2;       size:1; signed:0;
-       field:unsigned char common_preempt_count;               offset:3;       size:1; signed:0;
-       field:int common_pid;                                   offset:4;       size:4; signed:1;
-
-       field:unsigned long call_site;                          offset:8;       size:8; signed:0;
-       field:const void * ptr;                                 offset:16;      size:8; signed:0;
-       field:size_t bytes_req;                                 offset:24;      size:8; signed:0;
-       field:size_t bytes_alloc;                               offset:32;      size:8; signed:0;
-       field:gfp_t gfp_flags;                                  offset:40;      size:4; signed:0;
-
-  We'll start by creating a hist trigger that generates a simple table
-  that lists the total number of bytes requested for each function in
-  the kernel that made one or more calls to kmalloc::
-
-    # echo 'hist:key=call_site:val=bytes_req' > \
-            /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-  This tells the tracing system to create a 'hist' trigger using the
-  call_site field of the kmalloc event as the key for the table, which
-  just means that each unique call_site address will have an entry
-  created for it in the table.  The 'val=bytes_req' parameter tells
-  the hist trigger that for each unique entry (call_site) in the
-  table, it should keep a running total of the number of bytes
-  requested by that call_site.
-
-  We'll let it run for awhile and then dump the contents of the 'hist'
-  file in the kmalloc event's subdirectory (for readability, a number
-  of entries have been omitted)::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-    { call_site: 18446744072106379007 } hitcount:          1  bytes_req:        176
-    { call_site: 18446744071579557049 } hitcount:          1  bytes_req:       1024
-    { call_site: 18446744071580608289 } hitcount:          1  bytes_req:      16384
-    { call_site: 18446744071581827654 } hitcount:          1  bytes_req:         24
-    { call_site: 18446744071580700980 } hitcount:          1  bytes_req:          8
-    { call_site: 18446744071579359876 } hitcount:          1  bytes_req:        152
-    { call_site: 18446744071580795365 } hitcount:          3  bytes_req:        144
-    { call_site: 18446744071581303129 } hitcount:          3  bytes_req:        144
-    { call_site: 18446744071580713234 } hitcount:          4  bytes_req:       2560
-    { call_site: 18446744071580933750 } hitcount:          4  bytes_req:        736
-    .
-    .
-    .
-    { call_site: 18446744072106047046 } hitcount:         69  bytes_req:       5576
-    { call_site: 18446744071582116407 } hitcount:         73  bytes_req:       2336
-    { call_site: 18446744072106054684 } hitcount:        136  bytes_req:     140504
-    { call_site: 18446744072106224230 } hitcount:        136  bytes_req:      19584
-    { call_site: 18446744072106078074 } hitcount:        153  bytes_req:       2448
-    { call_site: 18446744072106062406 } hitcount:        153  bytes_req:      36720
-    { call_site: 18446744071582507929 } hitcount:        153  bytes_req:      37088
-    { call_site: 18446744072102520590 } hitcount:        273  bytes_req:      10920
-    { call_site: 18446744071582143559 } hitcount:        358  bytes_req:        716
-    { call_site: 18446744072106465852 } hitcount:        417  bytes_req:      56712
-    { call_site: 18446744072102523378 } hitcount:        485  bytes_req:      27160
-    { call_site: 18446744072099568646 } hitcount:       1676  bytes_req:      33520
-
-    Totals:
-        Hits: 4610
-        Entries: 45
-        Dropped: 0
-
-  The output displays a line for each entry, beginning with the key
-  specified in the trigger, followed by the value(s) also specified in
-  the trigger.  At the beginning of the output is a line that displays
-  the trigger info, which can also be displayed by reading the
-  'trigger' file::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-    hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-  At the end of the output are a few lines that display the overall
-  totals for the run.  The 'Hits' field shows the total number of
-  times the event trigger was hit, the 'Entries' field shows the total
-  number of used entries in the hash table, and the 'Dropped' field
-  shows the number of hits that were dropped because the number of
-  used entries for the run exceeded the maximum number of entries
-  allowed for the table (normally 0, but if not a hint that you may
-  want to increase the size of the table using the 'size' parameter).
-
-  Notice in the above output that there's an extra field, 'hitcount',
-  which wasn't specified in the trigger.  Also notice that in the
-  trigger info output, there's a parameter, 'sort=hitcount', which
-  wasn't specified in the trigger either.  The reason for that is that
-  every trigger implicitly keeps a count of the total number of hits
-  attributed to a given entry, called the 'hitcount'.  That hitcount
-  information is explicitly displayed in the output, and in the
-  absence of a user-specified sort parameter, is used as the default
-  sort field.
-
-  The value 'hitcount' can be used in place of an explicit value in
-  the 'values' parameter if you don't really need to have any
-  particular field summed and are mainly interested in hit
-  frequencies.
-
-  To turn the hist trigger off, simply call up the trigger in the
-  command history and re-execute it with a '!' prepended::
-
-    # echo '!hist:key=call_site:val=bytes_req' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-  Finally, notice that the call_site as displayed in the output above
-  isn't really very useful.  It's an address, but normally addresses
-  are displayed in hex.  To have a numeric field displayed as a hex
-  value, simply append '.hex' to the field name in the trigger::
-
-    # echo 'hist:key=call_site.hex:val=bytes_req' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-    { call_site: ffffffffa026b291 } hitcount:          1  bytes_req:        433
-    { call_site: ffffffffa07186ff } hitcount:          1  bytes_req:        176
-    { call_site: ffffffff811ae721 } hitcount:          1  bytes_req:      16384
-    { call_site: ffffffff811c5134 } hitcount:          1  bytes_req:          8
-    { call_site: ffffffffa04a9ebb } hitcount:          1  bytes_req:        511
-    { call_site: ffffffff8122e0a6 } hitcount:          1  bytes_req:         12
-    { call_site: ffffffff8107da84 } hitcount:          1  bytes_req:        152
-    { call_site: ffffffff812d8246 } hitcount:          1  bytes_req:         24
-    { call_site: ffffffff811dc1e5 } hitcount:          3  bytes_req:        144
-    { call_site: ffffffffa02515e8 } hitcount:          3  bytes_req:        648
-    { call_site: ffffffff81258159 } hitcount:          3  bytes_req:        144
-    { call_site: ffffffff811c80f4 } hitcount:          4  bytes_req:        544
-    .
-    .
-    .
-    { call_site: ffffffffa06c7646 } hitcount:        106  bytes_req:       8024
-    { call_site: ffffffffa06cb246 } hitcount:        132  bytes_req:      31680
-    { call_site: ffffffffa06cef7a } hitcount:        132  bytes_req:       2112
-    { call_site: ffffffff8137e399 } hitcount:        132  bytes_req:      23232
-    { call_site: ffffffffa06c941c } hitcount:        185  bytes_req:     171360
-    { call_site: ffffffffa06f2a66 } hitcount:        185  bytes_req:      26640
-    { call_site: ffffffffa036a70e } hitcount:        265  bytes_req:      10600
-    { call_site: ffffffff81325447 } hitcount:        292  bytes_req:        584
-    { call_site: ffffffffa072da3c } hitcount:        446  bytes_req:      60656
-    { call_site: ffffffffa036b1f2 } hitcount:        526  bytes_req:      29456
-    { call_site: ffffffffa0099c06 } hitcount:       1780  bytes_req:      35600
-
-    Totals:
-        Hits: 4775
-        Entries: 46
-        Dropped: 0
-
-  Even that's only marginally more useful - while hex values do look
-  more like addresses, what users are typically more interested in
-  when looking at text addresses are the corresponding symbols
-  instead.  To have an address displayed as symbolic value instead,
-  simply append '.sym' or '.sym-offset' to the field name in the
-  trigger::
-
-    # echo 'hist:key=call_site.sym:val=bytes_req' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-    { call_site: [ffffffff810adcb9] syslog_print_all                              } hitcount:          1  bytes_req:       1024
-    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffff8154acbe] usb_alloc_urb                                 } hitcount:          1  bytes_req:        192
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
-    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff811febd5] fsnotify_alloc_group                          } hitcount:          2  bytes_req:        528
-    { call_site: [ffffffff81440f58] __tty_buffer_request_room                     } hitcount:          2  bytes_req:       2624
-    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          2  bytes_req:         96
-    { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211]      } hitcount:          2  bytes_req:        464
-    { call_site: [ffffffff81672406] tcp_get_metrics                               } hitcount:          2  bytes_req:        304
-    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff81089b05] sched_create_group                            } hitcount:          2  bytes_req:       1424
-    .
-    .
-    .
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1185  bytes_req:     123240
-    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:       1185  bytes_req:     104280
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       1402  bytes_req:     190672
-    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       1518  bytes_req:     146208
-    { call_site: [ffffffffa029070e] drm_vma_node_allow [drm]                      } hitcount:       1746  bytes_req:      69840
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       2021  bytes_req:     792312
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       2592  bytes_req:     145152
-    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2629  bytes_req:     378576
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2629  bytes_req:    3783248
-    { call_site: [ffffffff81325607] apparmor_file_alloc_security                  } hitcount:       5192  bytes_req:      10384
-    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       5529  bytes_req:     110584
-    { call_site: [ffffffff8131ebf7] aa_alloc_task_context                         } hitcount:      21943  bytes_req:     702176
-    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:      55759  bytes_req:    5074265
-
-    Totals:
-        Hits: 109928
-        Entries: 71
-        Dropped: 0
-
-  Because the default sort key above is 'hitcount', the above shows a
-  the list of call_sites by increasing hitcount, so that at the bottom
-  we see the functions that made the most kmalloc calls during the
-  run.  If instead we we wanted to see the top kmalloc callers in
-  terms of the number of bytes requested rather than the number of
-  calls, and we wanted the top caller to appear at the top, we can use
-  the 'sort' parameter, along with the 'descending' modifier::
-
-    # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
-
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2186  bytes_req:    3397464
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1790  bytes_req:     712176
-    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       8132  bytes_req:     513135
-    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        106  bytes_req:     440128
-    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2186  bytes_req:     314784
-    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       2174  bytes_req:     208992
-    { call_site: [ffffffff811ae8e1] __kmalloc                                     } hitcount:          8  bytes_req:     131072
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:        859  bytes_req:     116824
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       1834  bytes_req:     102704
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:        972  bytes_req:     101088
-    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:        972  bytes_req:      85536
-    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       3333  bytes_req:      66664
-    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        209  bytes_req:      61632
-    .
-    .
-    .
-    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff812d8406] copy_semundo                                  } hitcount:          2  bytes_req:         48
-    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          1  bytes_req:         48
-    { call_site: [ffffffffa027121a] drm_getmagic [drm]                            } hitcount:          1  bytes_req:         48
-    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
-    { call_site: [ffffffff811c52f4] bprm_change_interp                            } hitcount:          2  bytes_req:         16
-    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
-
-    Totals:
-        Hits: 32133
-        Entries: 81
-        Dropped: 0
-
-  To display the offset and size information in addition to the symbol
-  name, just use 'sym-offset' instead::
-
-    # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
-
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915]                  } hitcount:       4569  bytes_req:    3163720
-    { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915]                      } hitcount:       4569  bytes_req:     657936
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915]      } hitcount:       1519  bytes_req:     472936
-    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915]      } hitcount:       3050  bytes_req:     211832
-    { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50                                 } hitcount:         34  bytes_req:     148384
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915]                  } hitcount:       1385  bytes_req:     144040
-    { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0                                   } hitcount:          8  bytes_req:     131072
-    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm]              } hitcount:       1385  bytes_req:     121880
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm]                  } hitcount:       1848  bytes_req:     103488
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915]            } hitcount:        461  bytes_req:      62696
-    { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm]                      } hitcount:       1541  bytes_req:      61640
-    { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0                                } hitcount:         57  bytes_req:      57456
-    .
-    .
-    .
-    { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0                       } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm]                      } hitcount:          3  bytes_req:         96
-    { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0                         } hitcount:          8  bytes_req:         96
-    { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650                            } hitcount:          3  bytes_req:         84
-    { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110                              } hitcount:          1  bytes_req:          8
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid]                     } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid]                    } hitcount:          1  bytes_req:          7
-
-    Totals:
-        Hits: 26098
-        Entries: 64
-        Dropped: 0
-
-  We can also add multiple fields to the 'values' parameter.  For
-  example, we might want to see the total number of bytes allocated
-  alongside bytes requested, and display the result sorted by bytes
-  allocated in a descending order::
-
-    # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active]
-
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       7403  bytes_req:    4084360  bytes_alloc:    5958016
-    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        541  bytes_req:    2213968  bytes_alloc:    2228224
-    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       7404  bytes_req:    1066176  bytes_alloc:    1421568
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1565  bytes_req:     557368  bytes_alloc:    1037760
-    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       9557  bytes_req:     595778  bytes_alloc:     695744
-    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       5839  bytes_req:     430680  bytes_alloc:     470400
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       2388  bytes_req:     324768  bytes_alloc:     458496
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       3911  bytes_req:     219016  bytes_alloc:     250304
-    { call_site: [ffffffff815f8d7b] sk_prot_alloc                                 } hitcount:        235  bytes_req:     236880  bytes_alloc:     240640
-    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        557  bytes_req:     169024  bytes_alloc:     221760
-    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       9378  bytes_req:     187548  bytes_alloc:     206312
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1519  bytes_req:     157976  bytes_alloc:     194432
-    .
-    .
-    .
-    { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach                 } hitcount:          2  bytes_req:        144  bytes_alloc:        192
-    { call_site: [ffffffff81097ee8] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff81213e80] load_elf_binary                               } hitcount:          3  bytes_req:         84  bytes_alloc:         96
-    { call_site: [ffffffff81079a2e] kthread_create_on_node                        } hitcount:          1  bytes_req:         56  bytes_alloc:         64
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7  bytes_alloc:          8
-    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8  bytes_alloc:          8
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7  bytes_alloc:          8
-
-    Totals:
-        Hits: 66598
-        Entries: 65
-        Dropped: 0
-
-  Finally, to finish off our kmalloc example, instead of simply having
-  the hist trigger display symbolic call_sites, we can have the hist
-  trigger additionally display the complete set of kernel stack traces
-  that led to each call_site.  To do that, we simply use the special
-  value 'stacktrace' for the key parameter::
-
-    # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-  The above trigger will use the kernel stack trace in effect when an
-  event is triggered as the key for the hash table.  This allows the
-  enumeration of every kernel callpath that led up to a particular
-  event, along with a running total of any of the event fields for
-  that event.  Here we tally bytes requested and bytes allocated for
-  every callpath in the system that led up to a kmalloc (in this case
-  every callpath to a kmalloc for a kernel compile)::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
-
-    { stacktrace:
-         __kmalloc_track_caller+0x10b/0x1a0
-         kmemdup+0x20/0x50
-         hidraw_report_event+0x8a/0x120 [hid]
-         hid_report_raw_event+0x3ea/0x440 [hid]
-         hid_input_report+0x112/0x190 [hid]
-         hid_irq_in+0xc2/0x260 [usbhid]
-         __usb_hcd_giveback_urb+0x72/0x120
-         usb_giveback_urb_bh+0x9e/0xe0
-         tasklet_hi_action+0xf8/0x100
-         __do_softirq+0x114/0x2c0
-         irq_exit+0xa5/0xb0
-         do_IRQ+0x5a/0xf0
-         ret_from_intr+0x0/0x30
-         cpuidle_enter+0x17/0x20
-         cpu_startup_entry+0x315/0x3e0
-         rest_init+0x7c/0x80
-    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
-    { stacktrace:
-         __kmalloc_track_caller+0x10b/0x1a0
-         kmemdup+0x20/0x50
-         hidraw_report_event+0x8a/0x120 [hid]
-         hid_report_raw_event+0x3ea/0x440 [hid]
-         hid_input_report+0x112/0x190 [hid]
-         hid_irq_in+0xc2/0x260 [usbhid]
-         __usb_hcd_giveback_urb+0x72/0x120
-         usb_giveback_urb_bh+0x9e/0xe0
-         tasklet_hi_action+0xf8/0x100
-         __do_softirq+0x114/0x2c0
-         irq_exit+0xa5/0xb0
-         do_IRQ+0x5a/0xf0
-         ret_from_intr+0x0/0x30
-    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
-    { stacktrace:
-         kmem_cache_alloc_trace+0xeb/0x150
-         aa_alloc_task_context+0x27/0x40
-         apparmor_cred_prepare+0x1f/0x50
-         security_prepare_creds+0x16/0x20
-         prepare_creds+0xdf/0x1a0
-         SyS_capset+0xb5/0x200
-         system_call_fastpath+0x12/0x6a
-    } hitcount:          1  bytes_req:         32  bytes_alloc:         32
-    .
-    .
-    .
-    { stacktrace:
-         __kmalloc+0x11b/0x1b0
-         i915_gem_execbuffer2+0x6c/0x2c0 [i915]
-         drm_ioctl+0x349/0x670 [drm]
-         do_vfs_ioctl+0x2f0/0x4f0
-         SyS_ioctl+0x81/0xa0
-         system_call_fastpath+0x12/0x6a
-    } hitcount:      17726  bytes_req:   13944120  bytes_alloc:   19593808
-    { stacktrace:
-         __kmalloc+0x11b/0x1b0
-         load_elf_phdrs+0x76/0xa0
-         load_elf_binary+0x102/0x1650
-         search_binary_handler+0x97/0x1d0
-         do_execveat_common.isra.34+0x551/0x6e0
-         SyS_execve+0x3a/0x50
-         return_from_execve+0x0/0x23
-    } hitcount:      33348  bytes_req:   17152128  bytes_alloc:   20226048
-    { stacktrace:
-         kmem_cache_alloc_trace+0xeb/0x150
-         apparmor_file_alloc_security+0x27/0x40
-         security_file_alloc+0x16/0x20
-         get_empty_filp+0x93/0x1c0
-         path_openat+0x31/0x5f0
-         do_filp_open+0x3a/0x90
-         do_sys_open+0x128/0x220
-         SyS_open+0x1e/0x20
-         system_call_fastpath+0x12/0x6a
-    } hitcount:    4766422  bytes_req:    9532844  bytes_alloc:   38131376
-    { stacktrace:
-         __kmalloc+0x11b/0x1b0
-         seq_buf_alloc+0x1b/0x50
-         seq_read+0x2cc/0x370
-         proc_reg_read+0x3d/0x80
-         __vfs_read+0x28/0xe0
-         vfs_read+0x86/0x140
-         SyS_read+0x46/0xb0
-         system_call_fastpath+0x12/0x6a
-    } hitcount:      19133  bytes_req:   78368768  bytes_alloc:   78368768
-
-    Totals:
-        Hits: 6085872
-        Entries: 253
-        Dropped: 0
-
-  If you key a hist trigger on common_pid, in order for example to
-  gather and display sorted totals for each process, you can use the
-  special .execname modifier to display the executable names for the
-  processes in the table rather than raw pids.  The example below
-  keeps a per-process sum of total bytes read::
-
-    # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \
-           /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
-
-    # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist
-    # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active]
-
-    { common_pid: gnome-terminal  [      3196] } hitcount:        280  count:    1093512
-    { common_pid: Xorg            [      1309] } hitcount:        525  count:     256640
-    { common_pid: compiz          [      2889] } hitcount:         59  count:     254400
-    { common_pid: bash            [      8710] } hitcount:          3  count:      66369
-    { common_pid: dbus-daemon-lau [      8703] } hitcount:         49  count:      47739
-    { common_pid: irqbalance      [      1252] } hitcount:         27  count:      27648
-    { common_pid: 01ifupdown      [      8705] } hitcount:          3  count:      17216
-    { common_pid: dbus-daemon     [       772] } hitcount:         10  count:      12396
-    { common_pid: Socket Thread   [      8342] } hitcount:         11  count:      11264
-    { common_pid: nm-dhcp-client. [      8701] } hitcount:          6  count:       7424
-    { common_pid: gmain           [      1315] } hitcount:         18  count:       6336
-    .
-    .
-    .
-    { common_pid: postgres        [      1892] } hitcount:          2  count:         32
-    { common_pid: postgres        [      1891] } hitcount:          2  count:         32
-    { common_pid: gmain           [      8704] } hitcount:          2  count:         32
-    { common_pid: upstart-dbus-br [      2740] } hitcount:         21  count:         21
-    { common_pid: nm-dispatcher.a [      8696] } hitcount:          1  count:         16
-    { common_pid: indicator-datet [      2904] } hitcount:          1  count:         16
-    { common_pid: gdbus           [      2998] } hitcount:          1  count:         16
-    { common_pid: rtkit-daemon    [      2052] } hitcount:          1  count:          8
-    { common_pid: init            [         1] } hitcount:          2  count:          2
-
-    Totals:
-        Hits: 2116
-        Entries: 51
-        Dropped: 0
-
-  Similarly, if you key a hist trigger on syscall id, for example to
-  gather and display a list of systemwide syscall hits, you can use
-  the special .syscall modifier to display the syscall names rather
-  than raw ids.  The example below keeps a running total of syscall
-  counts for the system during the run::
-
-    # echo 'hist:key=id.syscall:val=hitcount' > \
-           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
-
-    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
-    # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active]
-
-    { id: sys_fsync                     [ 74] } hitcount:          1
-    { id: sys_newuname                  [ 63] } hitcount:          1
-    { id: sys_prctl                     [157] } hitcount:          1
-    { id: sys_statfs                    [137] } hitcount:          1
-    { id: sys_symlink                   [ 88] } hitcount:          1
-    { id: sys_sendmmsg                  [307] } hitcount:          1
-    { id: sys_semctl                    [ 66] } hitcount:          1
-    { id: sys_readlink                  [ 89] } hitcount:          3
-    { id: sys_bind                      [ 49] } hitcount:          3
-    { id: sys_getsockname               [ 51] } hitcount:          3
-    { id: sys_unlink                    [ 87] } hitcount:          3
-    { id: sys_rename                    [ 82] } hitcount:          4
-    { id: unknown_syscall               [ 58] } hitcount:          4
-    { id: sys_connect                   [ 42] } hitcount:          4
-    { id: sys_getpid                    [ 39] } hitcount:          4
-    .
-    .
-    .
-    { id: sys_rt_sigprocmask            [ 14] } hitcount:        952
-    { id: sys_futex                     [202] } hitcount:       1534
-    { id: sys_write                     [  1] } hitcount:       2689
-    { id: sys_setitimer                 [ 38] } hitcount:       2797
-    { id: sys_read                      [  0] } hitcount:       3202
-    { id: sys_select                    [ 23] } hitcount:       3773
-    { id: sys_writev                    [ 20] } hitcount:       4531
-    { id: sys_poll                      [  7] } hitcount:       8314
-    { id: sys_recvmsg                   [ 47] } hitcount:      13738
-    { id: sys_ioctl                     [ 16] } hitcount:      21843
-
-    Totals:
-        Hits: 67612
-        Entries: 72
-        Dropped: 0
-
-  The syscall counts above provide a rough overall picture of system
-  call activity on the system; we can see for example that the most
-  popular system call on this system was the 'sys_ioctl' system call.
-
-  We can use 'compound' keys to refine that number and provide some
-  further insight as to which processes exactly contribute to the
-  overall ioctl count.
-
-  The command below keeps a hitcount for every unique combination of
-  system call id and pid - the end result is essentially a table
-  that keeps a per-pid sum of system call hits.  The results are
-  sorted using the system call id as the primary key, and the
-  hitcount sum as the secondary key::
-
-      # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \
-             /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
-
-      # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
-      # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active]
-
-      { id: sys_read                      [  0], common_pid: rtkit-daemon    [      1877] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: gdbus           [      2976] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: console-kit-dae [      3400] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: postgres        [      1865] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: deja-dup-monito [      3543] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: NetworkManager  [       890] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: evolution-calen [      3048] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: postgres        [      1864] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: nm-applet       [      3022] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: whoopsie        [      1212] } hitcount:          2
-      .
-      .
-      .
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      8479] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      3472] } hitcount:         12
-      { id: sys_ioctl                     [ 16], common_pid: gnome-terminal  [      3199] } hitcount:         16
-      { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:       1808
-      { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:       5580
-      .
-      .
-      .
-      { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2690] } hitcount:          3
-      { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2688] } hitcount:         16
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [       975] } hitcount:          2
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3204] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2888] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3003] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2873] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3196] } hitcount:          6
-      { id: sys_openat                    [257], common_pid: java            [      2623] } hitcount:          2
-      { id: sys_eventfd2                  [290], common_pid: ibus-ui-gtk3    [      2760] } hitcount:          4
-      { id: sys_eventfd2                  [290], common_pid: compiz          [      2994] } hitcount:          6
-
-      Totals:
-          Hits: 31536
-          Entries: 323
-          Dropped: 0
-
-  The above list does give us a breakdown of the ioctl syscall by
-  pid, but it also gives us quite a bit more than that, which we
-  don't really care about at the moment.  Since we know the syscall
-  id for sys_ioctl (16, displayed next to the sys_ioctl name), we
-  can use that to filter out all the other syscalls::
-
-      # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \
-             /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
-
-      # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
-      # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active]
-
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2769] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: evolution-addre [      8571] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      3003] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2781] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2829] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      8726] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      8508] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2970] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2768] } hitcount:          1
-      .
-      .
-      .
-      { id: sys_ioctl                     [ 16], common_pid: pool            [      8559] } hitcount:         45
-      { id: sys_ioctl                     [ 16], common_pid: pool            [      8555] } hitcount:         48
-      { id: sys_ioctl                     [ 16], common_pid: pool            [      8551] } hitcount:         48
-      { id: sys_ioctl                     [ 16], common_pid: avahi-daemon    [       896] } hitcount:         66
-      { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:      26674
-      { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:      73443
-
-      Totals:
-          Hits: 101162
-          Entries: 103
-          Dropped: 0
-
-  The above output shows that 'compiz' and 'Xorg' are far and away
-  the heaviest ioctl callers (which might lead to questions about
-  whether they really need to be making all those calls and to
-  possible avenues for further investigation.)
-
-  The compound key examples used a key and a sum value (hitcount) to
-  sort the output, but we can just as easily use two keys instead.
-  Here's an example where we use a compound key composed of the the
-  common_pid and size event fields.  Sorting with pid as the primary
-  key and 'size' as the secondary key allows us to display an
-  ordered summary of the recvfrom sizes, with counts, received by
-  each process::
-
-      # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \
-             /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger
-
-      # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist
-      # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active]
-
-      { common_pid: smbd            [       784], size:          4 } hitcount:          1
-      { common_pid: dnsmasq         [      1412], size:       4096 } hitcount:        672
-      { common_pid: postgres        [      1796], size:       1000 } hitcount:          6
-      { common_pid: postgres        [      1867], size:       1000 } hitcount:         10
-      { common_pid: bamfdaemon      [      2787], size:         28 } hitcount:          2
-      { common_pid: bamfdaemon      [      2787], size:      14360 } hitcount:          1
-      { common_pid: compiz          [      2994], size:          8 } hitcount:          1
-      { common_pid: compiz          [      2994], size:         20 } hitcount:         11
-      { common_pid: gnome-terminal  [      3199], size:          4 } hitcount:          2
-      { common_pid: firefox         [      8817], size:          4 } hitcount:          1
-      { common_pid: firefox         [      8817], size:          8 } hitcount:          5
-      { common_pid: firefox         [      8817], size:        588 } hitcount:          2
-      { common_pid: firefox         [      8817], size:        628 } hitcount:          1
-      { common_pid: firefox         [      8817], size:       6944 } hitcount:          1
-      { common_pid: firefox         [      8817], size:     408880 } hitcount:          2
-      { common_pid: firefox         [      8822], size:          8 } hitcount:          2
-      { common_pid: firefox         [      8822], size:        160 } hitcount:          2
-      { common_pid: firefox         [      8822], size:        320 } hitcount:          2
-      { common_pid: firefox         [      8822], size:        352 } hitcount:          1
-      .
-      .
-      .
-      { common_pid: pool            [      8923], size:       1960 } hitcount:         10
-      { common_pid: pool            [      8923], size:       2048 } hitcount:         10
-      { common_pid: pool            [      8924], size:       1960 } hitcount:         10
-      { common_pid: pool            [      8924], size:       2048 } hitcount:         10
-      { common_pid: pool            [      8928], size:       1964 } hitcount:          4
-      { common_pid: pool            [      8928], size:       1965 } hitcount:          2
-      { common_pid: pool            [      8928], size:       2048 } hitcount:          6
-      { common_pid: pool            [      8929], size:       1982 } hitcount:          1
-      { common_pid: pool            [      8929], size:       2048 } hitcount:          1
-
-      Totals:
-          Hits: 2016
-          Entries: 224
-          Dropped: 0
-
-  The above example also illustrates the fact that although a compound
-  key is treated as a single entity for hashing purposes, the sub-keys
-  it's composed of can be accessed independently.
-
-  The next example uses a string field as the hash key and
-  demonstrates how you can manually pause and continue a hist trigger.
-  In this example, we'll aggregate fork counts and don't expect a
-  large number of entries in the hash table, so we'll drop it to a
-  much smaller number, say 256::
-
-    # echo 'hist:key=child_comm:val=hitcount:size=256' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
-
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: ibus-daemon                         } hitcount:          1
-    { child_comm: whoopsie                            } hitcount:          1
-    { child_comm: smbd                                } hitcount:          1
-    { child_comm: gdbus                               } hitcount:          1
-    { child_comm: kthreadd                            } hitcount:          1
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: evolution-alarm                     } hitcount:          2
-    { child_comm: Socket Thread                       } hitcount:          2
-    { child_comm: postgres                            } hitcount:          2
-    { child_comm: bash                                } hitcount:          3
-    { child_comm: compiz                              } hitcount:          3
-    { child_comm: evolution-sourc                     } hitcount:          4
-    { child_comm: dhclient                            } hitcount:          4
-    { child_comm: pool                                } hitcount:          5
-    { child_comm: nm-dispatcher.a                     } hitcount:          8
-    { child_comm: firefox                             } hitcount:          8
-    { child_comm: dbus-daemon                         } hitcount:          8
-    { child_comm: glib-pacrunner                      } hitcount:         10
-    { child_comm: evolution                           } hitcount:         23
-
-    Totals:
-        Hits: 89
-        Entries: 20
-        Dropped: 0
-
-  If we want to pause the hist trigger, we can simply append :pause to
-  the command that started the trigger.  Notice that the trigger info
-  displays as [paused]::
-
-    # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused]
-
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: kthreadd                            } hitcount:          1
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: gdbus                               } hitcount:          1
-    { child_comm: ibus-daemon                         } hitcount:          1
-    { child_comm: Socket Thread                       } hitcount:          2
-    { child_comm: evolution-alarm                     } hitcount:          2
-    { child_comm: smbd                                } hitcount:          2
-    { child_comm: bash                                } hitcount:          3
-    { child_comm: whoopsie                            } hitcount:          3
-    { child_comm: compiz                              } hitcount:          3
-    { child_comm: evolution-sourc                     } hitcount:          4
-    { child_comm: pool                                } hitcount:          5
-    { child_comm: postgres                            } hitcount:          6
-    { child_comm: firefox                             } hitcount:          8
-    { child_comm: dhclient                            } hitcount:         10
-    { child_comm: emacs                               } hitcount:         12
-    { child_comm: dbus-daemon                         } hitcount:         20
-    { child_comm: nm-dispatcher.a                     } hitcount:         20
-    { child_comm: evolution                           } hitcount:         35
-    { child_comm: glib-pacrunner                      } hitcount:         59
-
-    Totals:
-        Hits: 199
-        Entries: 21
-        Dropped: 0
-
-  To manually continue having the trigger aggregate events, append
-  :cont instead.  Notice that the trigger info displays as [active]
-  again, and the data has changed::
-
-    # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
-
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: kthreadd                            } hitcount:          1
-    { child_comm: gdbus                               } hitcount:          1
-    { child_comm: ibus-daemon                         } hitcount:          1
-    { child_comm: Socket Thread                       } hitcount:          2
-    { child_comm: evolution-alarm                     } hitcount:          2
-    { child_comm: smbd                                } hitcount:          2
-    { child_comm: whoopsie                            } hitcount:          3
-    { child_comm: compiz                              } hitcount:          3
-    { child_comm: evolution-sourc                     } hitcount:          4
-    { child_comm: bash                                } hitcount:          5
-    { child_comm: pool                                } hitcount:          5
-    { child_comm: postgres                            } hitcount:          6
-    { child_comm: firefox                             } hitcount:          8
-    { child_comm: dhclient                            } hitcount:         11
-    { child_comm: emacs                               } hitcount:         12
-    { child_comm: dbus-daemon                         } hitcount:         22
-    { child_comm: nm-dispatcher.a                     } hitcount:         22
-    { child_comm: evolution                           } hitcount:         35
-    { child_comm: glib-pacrunner                      } hitcount:         59
-
-    Totals:
-        Hits: 206
-        Entries: 21
-        Dropped: 0
-
-  The previous example showed how to start and stop a hist trigger by
-  appending 'pause' and 'continue' to the hist trigger command.  A
-  hist trigger can also be started in a paused state by initially
-  starting the trigger with ':pause' appended.  This allows you to
-  start the trigger only when you're ready to start collecting data
-  and not before.  For example, you could start the trigger in a
-  paused state, then unpause it and do something you want to measure,
-  then pause the trigger again when done.
-
-  Of course, doing this manually can be difficult and error-prone, but
-  it is possible to automatically start and stop a hist trigger based
-  on some condition, via the enable_hist and disable_hist triggers.
-
-  For example, suppose we wanted to take a look at the relative
-  weights in terms of skb length for each callpath that leads to a
-  netif_receieve_skb event when downloading a decent-sized file using
-  wget.
-
-  First we set up an initially paused stacktrace trigger on the
-  netif_receive_skb event::
-
-    # echo 'hist:key=stacktrace:vals=len:pause' > \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-  Next, we set up an 'enable_hist' trigger on the sched_process_exec
-  event, with an 'if filename==/usr/bin/wget' filter.  The effect of
-  this new trigger is that it will 'unpause' the hist trigger we just
-  set up on netif_receive_skb if and only if it sees a
-  sched_process_exec event with a filename of '/usr/bin/wget'.  When
-  that happens, all netif_receive_skb events are aggregated into a
-  hash table keyed on stacktrace::
-
-    # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-
-  The aggregation continues until the netif_receive_skb is paused
-  again, which is what the following disable_hist event does by
-  creating a similar setup on the sched_process_exit event, using the
-  filter 'comm==wget'::
-
-    # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-
-  Whenever a process exits and the comm field of the disable_hist
-  trigger filter matches 'comm==wget', the netif_receive_skb hist
-  trigger is disabled.
-
-  The overall effect is that netif_receive_skb events are aggregated
-  into the hash table for only the duration of the wget.  Executing a
-  wget command and then listing the 'hist' file will display the
-  output generated by the wget command::
-
-    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
-    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
-
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         netif_receive_skb_internal+0x23/0x90
-         napi_gro_receive+0xc8/0x100
-         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
-         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
-         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
-         ieee80211_rx+0x31d/0x900 [mac80211]
-         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
-         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
-         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
-         irq_thread_fn+0x20/0x50
-         irq_thread+0x11f/0x150
-         kthread+0xd2/0xf0
-         ret_from_fork+0x42/0x70
-    } hitcount:         85  len:      28884
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         netif_receive_skb_internal+0x23/0x90
-         napi_gro_complete+0xa4/0xe0
-         dev_gro_receive+0x23a/0x360
-         napi_gro_receive+0x30/0x100
-         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
-         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
-         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
-         ieee80211_rx+0x31d/0x900 [mac80211]
-         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
-         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
-         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
-         irq_thread_fn+0x20/0x50
-         irq_thread+0x11f/0x150
-         kthread+0xd2/0xf0
-    } hitcount:         98  len:     664329
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         process_backlog+0xa8/0x150
-         net_rx_action+0x15d/0x340
-         __do_softirq+0x114/0x2c0
-         do_softirq_own_stack+0x1c/0x30
-         do_softirq+0x65/0x70
-         __local_bh_enable_ip+0xb5/0xc0
-         ip_finish_output+0x1f4/0x840
-         ip_output+0x6b/0xc0
-         ip_local_out_sk+0x31/0x40
-         ip_send_skb+0x1a/0x50
-         udp_send_skb+0x173/0x2a0
-         udp_sendmsg+0x2bf/0x9f0
-         inet_sendmsg+0x64/0xa0
-         sock_sendmsg+0x3d/0x50
-    } hitcount:        115  len:      13030
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         netif_receive_skb_internal+0x23/0x90
-         napi_gro_complete+0xa4/0xe0
-         napi_gro_flush+0x6d/0x90
-         iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi]
-         irq_thread_fn+0x20/0x50
-         irq_thread+0x11f/0x150
-         kthread+0xd2/0xf0
-         ret_from_fork+0x42/0x70
-    } hitcount:        934  len:    5512212
-
-    Totals:
-        Hits: 1232
-        Entries: 4
-        Dropped: 0
-
-  The above shows all the netif_receive_skb callpaths and their total
-  lengths for the duration of the wget command.
-
-  The 'clear' hist trigger param can be used to clear the hash table.
-  Suppose we wanted to try another run of the previous example but
-  this time also wanted to see the complete list of events that went
-  into the histogram.  In order to avoid having to set everything up
-  again, we can just clear the histogram first::
-
-    # echo 'hist:key=stacktrace:vals=len:clear' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-  Just to verify that it is in fact cleared, here's what we now see in
-  the hist file::
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
-    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
-
-    Totals:
-        Hits: 0
-        Entries: 0
-        Dropped: 0
-
-  Since we want to see the detailed list of every netif_receive_skb
-  event occurring during the new run, which are in fact the same
-  events being aggregated into the hash table, we add some additional
-  'enable_event' events to the triggering sched_process_exec and
-  sched_process_exit events as such::
-
-    # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-
-    # echo 'disable_event:net:netif_receive_skb if comm==wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-
-  If you read the trigger files for the sched_process_exec and
-  sched_process_exit triggers, you should see two triggers for each:
-  one enabling/disabling the hist aggregation and the other
-  enabling/disabling the logging of events::
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-    enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
-    enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-    enable_event:net:netif_receive_skb:unlimited if comm==wget
-    disable_hist:net:netif_receive_skb:unlimited if comm==wget
-
-  In other words, whenever either of the sched_process_exec or
-  sched_process_exit events is hit and matches 'wget', it enables or
-  disables both the histogram and the event log, and what you end up
-  with is a hash table and set of events just covering the specified
-  duration.  Run the wget command again::
-
-    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
-
-  Displaying the 'hist' file should show something similar to what you
-  saw in the last run, but this time you should also see the
-  individual events in the trace file::
-
-    # cat /sys/kernel/debug/tracing/trace
-
-    # tracer: nop
-    #
-    # entries-in-buffer/entries-written: 183/1426   #P:4
-    #
-    #                              _-----=> irqs-off
-    #                             / _----=> need-resched
-    #                            | / _---=> hardirq/softirq
-    #                            || / _--=> preempt-depth
-    #                            ||| /     delay
-    #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
-    #              | |       |   ||||       |         |
-                wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60
-                wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60
-             dnsmasq-1382  [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130
-             dnsmasq-1382  [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138
-    ##### CPU 2 buffer started ####
-      irq/29-iwlwifi-559   [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948
-      irq/29-iwlwifi-559   [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500
-      irq/29-iwlwifi-559   [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948
-      irq/29-iwlwifi-559   [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948
-      irq/29-iwlwifi-559   [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500
-    ....
-
-
-  The following example demonstrates how multiple hist triggers can be
-  attached to a given event.  This capability can be useful for
-  creating a set of different summaries derived from the same set of
-  events, or for comparing the effects of different filters, among
-  other things.
-  ::
-
-    # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=skbaddr.hex:vals=len' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=len:vals=common_preempt_count' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-  The above set of commands create four triggers differing only in
-  their filters, along with a completely different though fairly
-  nonsensical trigger.  Note that in order to append multiple hist
-  triggers to the same file, you should use the '>>' operator to
-  append them ('>' will also add the new hist trigger, but will remove
-  any existing hist triggers beforehand).
-
-  Displaying the contents of the 'hist' file for the event shows the
-  contents of all five histograms::
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
-
-    # event histogram
-    #
-    # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active]
-    #
-
-    { len:        176 } hitcount:          1  common_preempt_count:          0
-    { len:        223 } hitcount:          1  common_preempt_count:          0
-    { len:       4854 } hitcount:          1  common_preempt_count:          0
-    { len:        395 } hitcount:          1  common_preempt_count:          0
-    { len:        177 } hitcount:          1  common_preempt_count:          0
-    { len:        446 } hitcount:          1  common_preempt_count:          0
-    { len:       1601 } hitcount:          1  common_preempt_count:          0
-    .
-    .
-    .
-    { len:       1280 } hitcount:         66  common_preempt_count:          0
-    { len:        116 } hitcount:         81  common_preempt_count:         40
-    { len:        708 } hitcount:        112  common_preempt_count:          0
-    { len:         46 } hitcount:        221  common_preempt_count:          0
-    { len:       1264 } hitcount:        458  common_preempt_count:          0
-
-    Totals:
-        Hits: 1428
-        Entries: 147
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
-    #
-
-    { skbaddr: ffff8800baee5e00 } hitcount:          1  len:        130
-    { skbaddr: ffff88005f3d5600 } hitcount:          1  len:       1280
-    { skbaddr: ffff88005f3d4900 } hitcount:          1  len:       1280
-    { skbaddr: ffff88009fed6300 } hitcount:          1  len:        115
-    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:        115
-    { skbaddr: ffff88008cdb1900 } hitcount:          1  len:         46
-    { skbaddr: ffff880064b5ef00 } hitcount:          1  len:        118
-    { skbaddr: ffff880044e3c700 } hitcount:          1  len:         60
-    { skbaddr: ffff880100065900 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d46bd500 } hitcount:          1  len:        116
-    { skbaddr: ffff88005f3d5f00 } hitcount:          1  len:       1280
-    { skbaddr: ffff880100064700 } hitcount:          1  len:        365
-    { skbaddr: ffff8800badb6f00 } hitcount:          1  len:         60
-    .
-    .
-    .
-    { skbaddr: ffff88009fe0be00 } hitcount:         27  len:      24677
-    { skbaddr: ffff88009fe0a400 } hitcount:         27  len:      23052
-    { skbaddr: ffff88009fe0b700 } hitcount:         31  len:      25589
-    { skbaddr: ffff88009fe0b600 } hitcount:         32  len:      27326
-    { skbaddr: ffff88006a462800 } hitcount:         68  len:      71678
-    { skbaddr: ffff88006a463700 } hitcount:         70  len:      72678
-    { skbaddr: ffff88006a462b00 } hitcount:         71  len:      77589
-    { skbaddr: ffff88006a463600 } hitcount:         73  len:      71307
-    { skbaddr: ffff88006a462200 } hitcount:         81  len:      81032
-
-    Totals:
-        Hits: 1451
-        Entries: 318
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active]
-    #
-
-
-    Totals:
-        Hits: 0
-        Entries: 0
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active]
-    #
-
-    { skbaddr: ffff88009fd2c300 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcce00 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcd700 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcda00 } hitcount:          1  len:      21492
-    { skbaddr: ffff8800ae2e2d00 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcdb00 } hitcount:          1  len:       7212
-    { skbaddr: ffff88006a4df500 } hitcount:          1  len:       4854
-    { skbaddr: ffff88008ce47b00 } hitcount:          1  len:      18636
-    { skbaddr: ffff8800ae2e2200 } hitcount:          1  len:      12924
-    { skbaddr: ffff88005f3e1000 } hitcount:          1  len:       4356
-    { skbaddr: ffff8800d2bcdc00 } hitcount:          2  len:      24420
-    { skbaddr: ffff8800d2bcc200 } hitcount:          2  len:      12996
-
-    Totals:
-        Hits: 14
-        Entries: 12
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active]
-    #
-
-
-    Totals:
-        Hits: 0
-        Entries: 0
-        Dropped: 0
-
-  Named triggers can be used to have triggers share a common set of
-  histogram data.  This capability is mostly useful for combining the
-  output of events generated by tracepoints contained inside inline
-  functions, but names can be used in a hist trigger on any event.
-  For example, these two triggers when hit will update the same 'len'
-  field in the shared 'foo' histogram data::
-
-    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
-           /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-  You can see that they're updating common histogram data by reading
-  each event's hist files at the same time::
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist;
-      cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
-
-    # event histogram
-    #
-    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
-    #
-
-    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
-    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
-    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
-    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
-    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
-    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
-    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
-    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
-    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
-    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
-    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
-    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
-    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
-    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
-    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
-    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
-    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
-    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
-    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
-    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
-    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
-    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
-    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
-    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
-    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
-    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
-    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
-    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
-    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
-    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
-    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
-    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
-    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
-    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
-    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
-    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
-    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
-
-    Totals:
-        Hits: 81
-        Entries: 42
-        Dropped: 0
-    # event histogram
-    #
-    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
-    #
-
-    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
-    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
-    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
-    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
-    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
-    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
-    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
-    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
-    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
-    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
-    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
-    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
-    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
-    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
-    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
-    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
-    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
-    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
-    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
-    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
-    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
-    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
-    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
-    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
-    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
-    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
-    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
-    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
-    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
-    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
-    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
-    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
-    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
-    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
-    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
-    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
-    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
-
-    Totals:
-        Hits: 81
-        Entries: 42
-        Dropped: 0
-
-  And here's an example that shows how to combine histogram data from
-  any two events even if they don't share any 'compatible' fields
-  other than 'hitcount' and 'stacktrace'.  These commands create a
-  couple of triggers named 'bar' using those fields::
-
-    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
-          /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-  And displaying the output of either shows some interesting if
-  somewhat confusing output::
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
-
-    # event histogram
-    #
-    # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
-    #
-
-    { stacktrace:
-             _do_fork+0x18e/0x330
-             kernel_thread+0x29/0x30
-             kthreadd+0x154/0x1b0
-             ret_from_fork+0x3f/0x70
-    } hitcount:          1
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx_ni+0x20/0x70
-             dev_loopback_xmit+0xaa/0xd0
-             ip_mc_output+0x126/0x240
-             ip_local_out_sk+0x31/0x40
-             igmp_send_report+0x1e9/0x230
-             igmp_timer_expire+0xe9/0x120
-             call_timer_fn+0x39/0xf0
-             run_timer_softirq+0x1e1/0x290
-             __do_softirq+0xfd/0x290
-             irq_exit+0x98/0xb0
-             smp_apic_timer_interrupt+0x4a/0x60
-             apic_timer_interrupt+0x6d/0x80
-             cpuidle_enter+0x17/0x20
-             call_cpuidle+0x3b/0x60
-             cpu_startup_entry+0x22d/0x310
-    } hitcount:          1
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx_ni+0x20/0x70
-             dev_loopback_xmit+0xaa/0xd0
-             ip_mc_output+0x17f/0x240
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x13e/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             SYSC_sendto+0xef/0x170
-             SyS_sendto+0xe/0x10
-             entry_SYSCALL_64_fastpath+0x12/0x6a
-    } hitcount:          2
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx+0x1c/0x60
-             loopback_xmit+0x6c/0xb0
-             dev_hard_start_xmit+0x219/0x3a0
-             __dev_queue_xmit+0x415/0x4f0
-             dev_queue_xmit_sk+0x13/0x20
-             ip_finish_output2+0x237/0x340
-             ip_finish_output+0x113/0x1d0
-             ip_output+0x66/0xc0
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x16d/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             ___sys_sendmsg+0x14e/0x270
-    } hitcount:         76
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx+0x1c/0x60
-             loopback_xmit+0x6c/0xb0
-             dev_hard_start_xmit+0x219/0x3a0
-             __dev_queue_xmit+0x415/0x4f0
-             dev_queue_xmit_sk+0x13/0x20
-             ip_finish_output2+0x237/0x340
-             ip_finish_output+0x113/0x1d0
-             ip_output+0x66/0xc0
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x16d/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             ___sys_sendmsg+0x269/0x270
-    } hitcount:         77
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx+0x1c/0x60
-             loopback_xmit+0x6c/0xb0
-             dev_hard_start_xmit+0x219/0x3a0
-             __dev_queue_xmit+0x415/0x4f0
-             dev_queue_xmit_sk+0x13/0x20
-             ip_finish_output2+0x237/0x340
-             ip_finish_output+0x113/0x1d0
-             ip_output+0x66/0xc0
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x16d/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             SYSC_sendto+0xef/0x170
-    } hitcount:         88
-    { stacktrace:
-             _do_fork+0x18e/0x330
-             SyS_clone+0x19/0x20
-             entry_SYSCALL_64_fastpath+0x12/0x6a
-    } hitcount:        244
-
-    Totals:
-        Hits: 489
-        Entries: 7
-        Dropped: 0
+  See Documentation/trace/histogram.txt for details and examples.
index fdf5fb54a04c0bb1a89fadfd01fdad20fd132878..e45f0786f3f9ef29bb1da2f9c2b3c8d7430adc85 100644 (file)
@@ -543,6 +543,30 @@ of ftrace. Here is a list of some of the key files:
 
        See events.txt for more information.
 
+  timestamp_mode:
+
+       Certain tracers may change the timestamp mode used when
+       logging trace events into the event buffer.  Events with
+       different modes can coexist within a buffer but the mode in
+       effect when an event is logged determines which timestamp mode
+       is used for that event.  The default timestamp mode is
+       'delta'.
+
+       Usual timestamp modes for tracing:
+
+         # cat timestamp_mode
+         [delta] absolute
+
+         The timestamp mode with the square brackets around it is the
+         one in effect.
+
+         delta: Default timestamp mode - timestamp is a delta against
+                a per-buffer timestamp.
+
+         absolute: The timestamp is a full timestamp, not a delta
+                 against some other value.  As such it takes up more
+                 space and is less efficient.
+
   hwlat_detector:
 
        Directory for the Hardware Latency Detector.
diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt
new file mode 100644 (file)
index 0000000..6e05510
--- /dev/null
@@ -0,0 +1,1995 @@
+                            Event Histograms
+
+                   Documentation written by Tom Zanussi
+
+1. Introduction
+===============
+
+  Histogram triggers are special event triggers that can be used to
+  aggregate trace event data into histograms.  For information on
+  trace events and event triggers, see Documentation/trace/events.txt.
+
+
+2. Histogram Trigger Command
+============================
+
+  A histogram trigger command is an event trigger command that
+  aggregates event hits into a hash table keyed on one or more trace
+  event format fields (or stacktrace) and a set of running totals
+  derived from one or more trace event format fields and/or event
+  counts (hitcount).
+
+  The format of a hist trigger is as follows:
+
+        hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>]
+          [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue]
+          [:clear][:name=histname1] [if <filter>]
+
+  When a matching event is hit, an entry is added to a hash table
+  using the key(s) and value(s) named.  Keys and values correspond to
+  fields in the event's format description.  Values must correspond to
+  numeric fields - on an event hit, the value(s) will be added to a
+  sum kept for that field.  The special string 'hitcount' can be used
+  in place of an explicit value field - this is simply a count of
+  event hits.  If 'values' isn't specified, an implicit 'hitcount'
+  value will be automatically created and used as the only value.
+  Keys can be any field, or the special string 'stacktrace', which
+  will use the event's kernel stacktrace as the key.  The keywords
+  'keys' or 'key' can be used to specify keys, and the keywords
+  'values', 'vals', or 'val' can be used to specify values.  Compound
+  keys consisting of up to two fields can be specified by the 'keys'
+  keyword.  Hashing a compound key produces a unique entry in the
+  table for each unique combination of component keys, and can be
+  useful for providing more fine-grained summaries of event data.
+  Additionally, sort keys consisting of up to two fields can be
+  specified by the 'sort' keyword.  If more than one field is
+  specified, the result will be a 'sort within a sort': the first key
+  is taken to be the primary sort key and the second the secondary
+  key.  If a hist trigger is given a name using the 'name' parameter,
+  its histogram data will be shared with other triggers of the same
+  name, and trigger hits will update this common data.  Only triggers
+  with 'compatible' fields can be combined in this way; triggers are
+  'compatible' if the fields named in the trigger share the same
+  number and type of fields and those fields also have the same names.
+  Note that any two events always share the compatible 'hitcount' and
+  'stacktrace' fields and can therefore be combined using those
+  fields, however pointless that may be.
+
+  'hist' triggers add a 'hist' file to each event's subdirectory.
+  Reading the 'hist' file for the event will dump the hash table in
+  its entirety to stdout.  If there are multiple hist triggers
+  attached to an event, there will be a table for each trigger in the
+  output.  The table displayed for a named trigger will be the same as
+  any other instance having the same name. Each printed hash table
+  entry is a simple list of the keys and values comprising the entry;
+  keys are printed first and are delineated by curly braces, and are
+  followed by the set of value fields for the entry.  By default,
+  numeric fields are displayed as base-10 integers.  This can be
+  modified by appending any of the following modifiers to the field
+  name:
+
+        .hex        display a number as a hex value
+       .sym        display an address as a symbol
+       .sym-offset display an address as a symbol and offset
+       .syscall    display a syscall id as a system call name
+       .execname   display a common_pid as a program name
+       .log2       display log2 value rather than raw number
+       .usecs      display a common_timestamp in microseconds
+
+  Note that in general the semantics of a given field aren't
+  interpreted when applying a modifier to it, but there are some
+  restrictions to be aware of in this regard:
+
+    - only the 'hex' modifier can be used for values (because values
+      are essentially sums, and the other modifiers don't make sense
+      in that context).
+    - the 'execname' modifier can only be used on a 'common_pid'.  The
+      reason for this is that the execname is simply the 'comm' value
+      saved for the 'current' process when an event was triggered,
+      which is the same as the common_pid value saved by the event
+      tracing code.  Trying to apply that comm value to other pid
+      values wouldn't be correct, and typically events that care save
+      pid-specific comm fields in the event itself.
+
+  A typical usage scenario would be the following to enable a hist
+  trigger, read its current contents, and then turn it off:
+
+  # echo 'hist:keys=skbaddr.hex:vals=len' > \
+    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+  # echo '!hist:keys=skbaddr.hex:vals=len' > \
+    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  The trigger file itself can be read to show the details of the
+  currently attached hist trigger.  This information is also displayed
+  at the top of the 'hist' file when read.
+
+  By default, the size of the hash table is 2048 entries.  The 'size'
+  parameter can be used to specify more or fewer than that.  The units
+  are in terms of hashtable entries - if a run uses more entries than
+  specified, the results will show the number of 'drops', the number
+  of hits that were ignored.  The size should be a power of 2 between
+  128 and 131072 (any non- power-of-2 number specified will be rounded
+  up).
+
+  The 'sort' parameter can be used to specify a value field to sort
+  on.  The default if unspecified is 'hitcount' and the default sort
+  order is 'ascending'.  To sort in the opposite direction, append
+  .descending' to the sort key.
+
+  The 'pause' parameter can be used to pause an existing hist trigger
+  or to start a hist trigger but not log any events until told to do
+  so.  'continue' or 'cont' can be used to start or restart a paused
+  hist trigger.
+
+  The 'clear' parameter will clear the contents of a running hist
+  trigger and leave its current paused/active state.
+
+  Note that the 'pause', 'cont', and 'clear' parameters should be
+  applied using 'append' shell operator ('>>') if applied to an
+  existing trigger, rather than via the '>' operator, which will cause
+  the trigger to be removed through truncation.
+
+- enable_hist/disable_hist
+
+  The enable_hist and disable_hist triggers can be used to have one
+  event conditionally start and stop another event's already-attached
+  hist trigger.  Any number of enable_hist and disable_hist triggers
+  can be attached to a given event, allowing that event to kick off
+  and stop aggregations on a host of other events.
+
+  The format is very similar to the enable/disable_event triggers:
+
+      enable_hist:<system>:<event>[:count]
+      disable_hist:<system>:<event>[:count]
+
+  Instead of enabling or disabling the tracing of the target event
+  into the trace buffer as the enable/disable_event triggers do, the
+  enable/disable_hist triggers enable or disable the aggregation of
+  the target event into a hash table.
+
+  A typical usage scenario for the enable_hist/disable_hist triggers
+  would be to first set up a paused hist trigger on some event,
+  followed by an enable_hist/disable_hist pair that turns the hist
+  aggregation on and off when conditions of interest are hit:
+
+  # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \
+    /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
+    /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+  # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
+    /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+  The above sets up an initially paused hist trigger which is unpaused
+  and starts aggregating events when a given program is executed, and
+  which stops aggregating when the process exits and the hist trigger
+  is paused again.
+
+  The examples below provide a more concrete illustration of the
+  concepts and typical usage patterns discussed above.
+
+  'special' event fields
+  ------------------------
+
+  There are a number of 'special event fields' available for use as
+  keys or values in a hist trigger.  These look like and behave as if
+  they were actual event fields, but aren't really part of the event's
+  field definition or format file.  They are however available for any
+  event, and can be used anywhere an actual event field could be.
+  They are:
+
+    common_timestamp       u64 - timestamp (from ring buffer) associated
+                                 with the event, in nanoseconds.  May be
+                                modified by .usecs to have timestamps
+                                interpreted as microseconds.
+    cpu                    int - the cpu on which the event occurred.
+
+  Extended error information
+  --------------------------
+
+  For some error conditions encountered when invoking a hist trigger
+  command, extended error information is available via the
+  corresponding event's 'hist' file.  Reading the hist file after an
+  error will display more detailed information about what went wrong,
+  if information is available.  This extended error information will
+  be available until the next hist trigger command for that event.
+
+  If available for a given error condition, the extended error
+  information and usage takes the following form:
+
+    # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
+    echo: write error: Invalid argument
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist
+    ERROR: Couldn't yyy: zzz
+      Last command: xxx
+
+6.2 'hist' trigger examples
+---------------------------
+
+  The first set of examples creates aggregations using the kmalloc
+  event.  The fields that can be used for the hist trigger are listed
+  in the kmalloc event's format file:
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format
+    name: kmalloc
+    ID: 374
+    format:
+       field:unsigned short common_type;       offset:0;       size:2; signed:0;
+       field:unsigned char common_flags;       offset:2;       size:1; signed:0;
+       field:unsigned char common_preempt_count;               offset:3;       size:1; signed:0;
+       field:int common_pid;                                   offset:4;       size:4; signed:1;
+
+       field:unsigned long call_site;                          offset:8;       size:8; signed:0;
+       field:const void * ptr;                                 offset:16;      size:8; signed:0;
+       field:size_t bytes_req;                                 offset:24;      size:8; signed:0;
+       field:size_t bytes_alloc;                               offset:32;      size:8; signed:0;
+       field:gfp_t gfp_flags;                                  offset:40;      size:4; signed:0;
+
+  We'll start by creating a hist trigger that generates a simple table
+  that lists the total number of bytes requested for each function in
+  the kernel that made one or more calls to kmalloc:
+
+    # echo 'hist:key=call_site:val=bytes_req' > \
+            /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+  This tells the tracing system to create a 'hist' trigger using the
+  call_site field of the kmalloc event as the key for the table, which
+  just means that each unique call_site address will have an entry
+  created for it in the table.  The 'val=bytes_req' parameter tells
+  the hist trigger that for each unique entry (call_site) in the
+  table, it should keep a running total of the number of bytes
+  requested by that call_site.
+
+  We'll let it run for awhile and then dump the contents of the 'hist'
+  file in the kmalloc event's subdirectory (for readability, a number
+  of entries have been omitted):
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+    { call_site: 18446744072106379007 } hitcount:          1  bytes_req:        176
+    { call_site: 18446744071579557049 } hitcount:          1  bytes_req:       1024
+    { call_site: 18446744071580608289 } hitcount:          1  bytes_req:      16384
+    { call_site: 18446744071581827654 } hitcount:          1  bytes_req:         24
+    { call_site: 18446744071580700980 } hitcount:          1  bytes_req:          8
+    { call_site: 18446744071579359876 } hitcount:          1  bytes_req:        152
+    { call_site: 18446744071580795365 } hitcount:          3  bytes_req:        144
+    { call_site: 18446744071581303129 } hitcount:          3  bytes_req:        144
+    { call_site: 18446744071580713234 } hitcount:          4  bytes_req:       2560
+    { call_site: 18446744071580933750 } hitcount:          4  bytes_req:        736
+    .
+    .
+    .
+    { call_site: 18446744072106047046 } hitcount:         69  bytes_req:       5576
+    { call_site: 18446744071582116407 } hitcount:         73  bytes_req:       2336
+    { call_site: 18446744072106054684 } hitcount:        136  bytes_req:     140504
+    { call_site: 18446744072106224230 } hitcount:        136  bytes_req:      19584
+    { call_site: 18446744072106078074 } hitcount:        153  bytes_req:       2448
+    { call_site: 18446744072106062406 } hitcount:        153  bytes_req:      36720
+    { call_site: 18446744071582507929 } hitcount:        153  bytes_req:      37088
+    { call_site: 18446744072102520590 } hitcount:        273  bytes_req:      10920
+    { call_site: 18446744071582143559 } hitcount:        358  bytes_req:        716
+    { call_site: 18446744072106465852 } hitcount:        417  bytes_req:      56712
+    { call_site: 18446744072102523378 } hitcount:        485  bytes_req:      27160
+    { call_site: 18446744072099568646 } hitcount:       1676  bytes_req:      33520
+
+    Totals:
+        Hits: 4610
+        Entries: 45
+        Dropped: 0
+
+  The output displays a line for each entry, beginning with the key
+  specified in the trigger, followed by the value(s) also specified in
+  the trigger.  At the beginning of the output is a line that displays
+  the trigger info, which can also be displayed by reading the
+  'trigger' file:
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+    hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+  At the end of the output are a few lines that display the overall
+  totals for the run.  The 'Hits' field shows the total number of
+  times the event trigger was hit, the 'Entries' field shows the total
+  number of used entries in the hash table, and the 'Dropped' field
+  shows the number of hits that were dropped because the number of
+  used entries for the run exceeded the maximum number of entries
+  allowed for the table (normally 0, but if not a hint that you may
+  want to increase the size of the table using the 'size' parameter).
+
+  Notice in the above output that there's an extra field, 'hitcount',
+  which wasn't specified in the trigger.  Also notice that in the
+  trigger info output, there's a parameter, 'sort=hitcount', which
+  wasn't specified in the trigger either.  The reason for that is that
+  every trigger implicitly keeps a count of the total number of hits
+  attributed to a given entry, called the 'hitcount'.  That hitcount
+  information is explicitly displayed in the output, and in the
+  absence of a user-specified sort parameter, is used as the default
+  sort field.
+
+  The value 'hitcount' can be used in place of an explicit value in
+  the 'values' parameter if you don't really need to have any
+  particular field summed and are mainly interested in hit
+  frequencies.
+
+  To turn the hist trigger off, simply call up the trigger in the
+  command history and re-execute it with a '!' prepended:
+
+    # echo '!hist:key=call_site:val=bytes_req' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+  Finally, notice that the call_site as displayed in the output above
+  isn't really very useful.  It's an address, but normally addresses
+  are displayed in hex.  To have a numeric field displayed as a hex
+  value, simply append '.hex' to the field name in the trigger:
+
+    # echo 'hist:key=call_site.hex:val=bytes_req' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+    { call_site: ffffffffa026b291 } hitcount:          1  bytes_req:        433
+    { call_site: ffffffffa07186ff } hitcount:          1  bytes_req:        176
+    { call_site: ffffffff811ae721 } hitcount:          1  bytes_req:      16384
+    { call_site: ffffffff811c5134 } hitcount:          1  bytes_req:          8
+    { call_site: ffffffffa04a9ebb } hitcount:          1  bytes_req:        511
+    { call_site: ffffffff8122e0a6 } hitcount:          1  bytes_req:         12
+    { call_site: ffffffff8107da84 } hitcount:          1  bytes_req:        152
+    { call_site: ffffffff812d8246 } hitcount:          1  bytes_req:         24
+    { call_site: ffffffff811dc1e5 } hitcount:          3  bytes_req:        144
+    { call_site: ffffffffa02515e8 } hitcount:          3  bytes_req:        648
+    { call_site: ffffffff81258159 } hitcount:          3  bytes_req:        144
+    { call_site: ffffffff811c80f4 } hitcount:          4  bytes_req:        544
+    .
+    .
+    .
+    { call_site: ffffffffa06c7646 } hitcount:        106  bytes_req:       8024
+    { call_site: ffffffffa06cb246 } hitcount:        132  bytes_req:      31680
+    { call_site: ffffffffa06cef7a } hitcount:        132  bytes_req:       2112
+    { call_site: ffffffff8137e399 } hitcount:        132  bytes_req:      23232
+    { call_site: ffffffffa06c941c } hitcount:        185  bytes_req:     171360
+    { call_site: ffffffffa06f2a66 } hitcount:        185  bytes_req:      26640
+    { call_site: ffffffffa036a70e } hitcount:        265  bytes_req:      10600
+    { call_site: ffffffff81325447 } hitcount:        292  bytes_req:        584
+    { call_site: ffffffffa072da3c } hitcount:        446  bytes_req:      60656
+    { call_site: ffffffffa036b1f2 } hitcount:        526  bytes_req:      29456
+    { call_site: ffffffffa0099c06 } hitcount:       1780  bytes_req:      35600
+
+    Totals:
+        Hits: 4775
+        Entries: 46
+        Dropped: 0
+
+  Even that's only marginally more useful - while hex values do look
+  more like addresses, what users are typically more interested in
+  when looking at text addresses are the corresponding symbols
+  instead.  To have an address displayed as symbolic value instead,
+  simply append '.sym' or '.sym-offset' to the field name in the
+  trigger:
+
+    # echo 'hist:key=call_site.sym:val=bytes_req' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+    { call_site: [ffffffff810adcb9] syslog_print_all                              } hitcount:          1  bytes_req:       1024
+    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffff8154acbe] usb_alloc_urb                                 } hitcount:          1  bytes_req:        192
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
+    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff811febd5] fsnotify_alloc_group                          } hitcount:          2  bytes_req:        528
+    { call_site: [ffffffff81440f58] __tty_buffer_request_room                     } hitcount:          2  bytes_req:       2624
+    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          2  bytes_req:         96
+    { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211]      } hitcount:          2  bytes_req:        464
+    { call_site: [ffffffff81672406] tcp_get_metrics                               } hitcount:          2  bytes_req:        304
+    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff81089b05] sched_create_group                            } hitcount:          2  bytes_req:       1424
+    .
+    .
+    .
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1185  bytes_req:     123240
+    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:       1185  bytes_req:     104280
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       1402  bytes_req:     190672
+    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       1518  bytes_req:     146208
+    { call_site: [ffffffffa029070e] drm_vma_node_allow [drm]                      } hitcount:       1746  bytes_req:      69840
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       2021  bytes_req:     792312
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       2592  bytes_req:     145152
+    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2629  bytes_req:     378576
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2629  bytes_req:    3783248
+    { call_site: [ffffffff81325607] apparmor_file_alloc_security                  } hitcount:       5192  bytes_req:      10384
+    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       5529  bytes_req:     110584
+    { call_site: [ffffffff8131ebf7] aa_alloc_task_context                         } hitcount:      21943  bytes_req:     702176
+    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:      55759  bytes_req:    5074265
+
+    Totals:
+        Hits: 109928
+        Entries: 71
+        Dropped: 0
+
+  Because the default sort key above is 'hitcount', the above shows a
+  the list of call_sites by increasing hitcount, so that at the bottom
+  we see the functions that made the most kmalloc calls during the
+  run.  If instead we we wanted to see the top kmalloc callers in
+  terms of the number of bytes requested rather than the number of
+  calls, and we wanted the top caller to appear at the top, we can use
+  the 'sort' parameter, along with the 'descending' modifier:
+
+    # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
+
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2186  bytes_req:    3397464
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1790  bytes_req:     712176
+    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       8132  bytes_req:     513135
+    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        106  bytes_req:     440128
+    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2186  bytes_req:     314784
+    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       2174  bytes_req:     208992
+    { call_site: [ffffffff811ae8e1] __kmalloc                                     } hitcount:          8  bytes_req:     131072
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:        859  bytes_req:     116824
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       1834  bytes_req:     102704
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:        972  bytes_req:     101088
+    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:        972  bytes_req:      85536
+    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       3333  bytes_req:      66664
+    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        209  bytes_req:      61632
+    .
+    .
+    .
+    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff812d8406] copy_semundo                                  } hitcount:          2  bytes_req:         48
+    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          1  bytes_req:         48
+    { call_site: [ffffffffa027121a] drm_getmagic [drm]                            } hitcount:          1  bytes_req:         48
+    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
+    { call_site: [ffffffff811c52f4] bprm_change_interp                            } hitcount:          2  bytes_req:         16
+    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
+
+    Totals:
+        Hits: 32133
+        Entries: 81
+        Dropped: 0
+
+  To display the offset and size information in addition to the symbol
+  name, just use 'sym-offset' instead:
+
+    # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
+
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915]                  } hitcount:       4569  bytes_req:    3163720
+    { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915]                      } hitcount:       4569  bytes_req:     657936
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915]      } hitcount:       1519  bytes_req:     472936
+    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915]      } hitcount:       3050  bytes_req:     211832
+    { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50                                 } hitcount:         34  bytes_req:     148384
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915]                  } hitcount:       1385  bytes_req:     144040
+    { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0                                   } hitcount:          8  bytes_req:     131072
+    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm]              } hitcount:       1385  bytes_req:     121880
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm]                  } hitcount:       1848  bytes_req:     103488
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915]            } hitcount:        461  bytes_req:      62696
+    { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm]                      } hitcount:       1541  bytes_req:      61640
+    { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0                                } hitcount:         57  bytes_req:      57456
+    .
+    .
+    .
+    { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0                       } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm]                      } hitcount:          3  bytes_req:         96
+    { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0                         } hitcount:          8  bytes_req:         96
+    { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650                            } hitcount:          3  bytes_req:         84
+    { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110                              } hitcount:          1  bytes_req:          8
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid]                     } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid]                    } hitcount:          1  bytes_req:          7
+
+    Totals:
+        Hits: 26098
+        Entries: 64
+        Dropped: 0
+
+  We can also add multiple fields to the 'values' parameter.  For
+  example, we might want to see the total number of bytes allocated
+  alongside bytes requested, and display the result sorted by bytes
+  allocated in a descending order:
+
+    # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active]
+
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       7403  bytes_req:    4084360  bytes_alloc:    5958016
+    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        541  bytes_req:    2213968  bytes_alloc:    2228224
+    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       7404  bytes_req:    1066176  bytes_alloc:    1421568
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1565  bytes_req:     557368  bytes_alloc:    1037760
+    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       9557  bytes_req:     595778  bytes_alloc:     695744
+    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       5839  bytes_req:     430680  bytes_alloc:     470400
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       2388  bytes_req:     324768  bytes_alloc:     458496
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       3911  bytes_req:     219016  bytes_alloc:     250304
+    { call_site: [ffffffff815f8d7b] sk_prot_alloc                                 } hitcount:        235  bytes_req:     236880  bytes_alloc:     240640
+    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        557  bytes_req:     169024  bytes_alloc:     221760
+    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       9378  bytes_req:     187548  bytes_alloc:     206312
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1519  bytes_req:     157976  bytes_alloc:     194432
+    .
+    .
+    .
+    { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach                 } hitcount:          2  bytes_req:        144  bytes_alloc:        192
+    { call_site: [ffffffff81097ee8] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff81213e80] load_elf_binary                               } hitcount:          3  bytes_req:         84  bytes_alloc:         96
+    { call_site: [ffffffff81079a2e] kthread_create_on_node                        } hitcount:          1  bytes_req:         56  bytes_alloc:         64
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7  bytes_alloc:          8
+    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8  bytes_alloc:          8
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7  bytes_alloc:          8
+
+    Totals:
+        Hits: 66598
+        Entries: 65
+        Dropped: 0
+
+  Finally, to finish off our kmalloc example, instead of simply having
+  the hist trigger display symbolic call_sites, we can have the hist
+  trigger additionally display the complete set of kernel stack traces
+  that led to each call_site.  To do that, we simply use the special
+  value 'stacktrace' for the key parameter:
+
+    # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+  The above trigger will use the kernel stack trace in effect when an
+  event is triggered as the key for the hash table.  This allows the
+  enumeration of every kernel callpath that led up to a particular
+  event, along with a running total of any of the event fields for
+  that event.  Here we tally bytes requested and bytes allocated for
+  every callpath in the system that led up to a kmalloc (in this case
+  every callpath to a kmalloc for a kernel compile):
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
+
+    { stacktrace:
+         __kmalloc_track_caller+0x10b/0x1a0
+         kmemdup+0x20/0x50
+         hidraw_report_event+0x8a/0x120 [hid]
+         hid_report_raw_event+0x3ea/0x440 [hid]
+         hid_input_report+0x112/0x190 [hid]
+         hid_irq_in+0xc2/0x260 [usbhid]
+         __usb_hcd_giveback_urb+0x72/0x120
+         usb_giveback_urb_bh+0x9e/0xe0
+         tasklet_hi_action+0xf8/0x100
+         __do_softirq+0x114/0x2c0
+         irq_exit+0xa5/0xb0
+         do_IRQ+0x5a/0xf0
+         ret_from_intr+0x0/0x30
+         cpuidle_enter+0x17/0x20
+         cpu_startup_entry+0x315/0x3e0
+         rest_init+0x7c/0x80
+    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
+    { stacktrace:
+         __kmalloc_track_caller+0x10b/0x1a0
+         kmemdup+0x20/0x50
+         hidraw_report_event+0x8a/0x120 [hid]
+         hid_report_raw_event+0x3ea/0x440 [hid]
+         hid_input_report+0x112/0x190 [hid]
+         hid_irq_in+0xc2/0x260 [usbhid]
+         __usb_hcd_giveback_urb+0x72/0x120
+         usb_giveback_urb_bh+0x9e/0xe0
+         tasklet_hi_action+0xf8/0x100
+         __do_softirq+0x114/0x2c0
+         irq_exit+0xa5/0xb0
+         do_IRQ+0x5a/0xf0
+         ret_from_intr+0x0/0x30
+    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
+    { stacktrace:
+         kmem_cache_alloc_trace+0xeb/0x150
+         aa_alloc_task_context+0x27/0x40
+         apparmor_cred_prepare+0x1f/0x50
+         security_prepare_creds+0x16/0x20
+         prepare_creds+0xdf/0x1a0
+         SyS_capset+0xb5/0x200
+         system_call_fastpath+0x12/0x6a
+    } hitcount:          1  bytes_req:         32  bytes_alloc:         32
+    .
+    .
+    .
+    { stacktrace:
+         __kmalloc+0x11b/0x1b0
+         i915_gem_execbuffer2+0x6c/0x2c0 [i915]
+         drm_ioctl+0x349/0x670 [drm]
+         do_vfs_ioctl+0x2f0/0x4f0
+         SyS_ioctl+0x81/0xa0
+         system_call_fastpath+0x12/0x6a
+    } hitcount:      17726  bytes_req:   13944120  bytes_alloc:   19593808
+    { stacktrace:
+         __kmalloc+0x11b/0x1b0
+         load_elf_phdrs+0x76/0xa0
+         load_elf_binary+0x102/0x1650
+         search_binary_handler+0x97/0x1d0
+         do_execveat_common.isra.34+0x551/0x6e0
+         SyS_execve+0x3a/0x50
+         return_from_execve+0x0/0x23
+    } hitcount:      33348  bytes_req:   17152128  bytes_alloc:   20226048
+    { stacktrace:
+         kmem_cache_alloc_trace+0xeb/0x150
+         apparmor_file_alloc_security+0x27/0x40
+         security_file_alloc+0x16/0x20
+         get_empty_filp+0x93/0x1c0
+         path_openat+0x31/0x5f0
+         do_filp_open+0x3a/0x90
+         do_sys_open+0x128/0x220
+         SyS_open+0x1e/0x20
+         system_call_fastpath+0x12/0x6a
+    } hitcount:    4766422  bytes_req:    9532844  bytes_alloc:   38131376
+    { stacktrace:
+         __kmalloc+0x11b/0x1b0
+         seq_buf_alloc+0x1b/0x50
+         seq_read+0x2cc/0x370
+         proc_reg_read+0x3d/0x80
+         __vfs_read+0x28/0xe0
+         vfs_read+0x86/0x140
+         SyS_read+0x46/0xb0
+         system_call_fastpath+0x12/0x6a
+    } hitcount:      19133  bytes_req:   78368768  bytes_alloc:   78368768
+
+    Totals:
+        Hits: 6085872
+        Entries: 253
+        Dropped: 0
+
+  If you key a hist trigger on common_pid, in order for example to
+  gather and display sorted totals for each process, you can use the
+  special .execname modifier to display the executable names for the
+  processes in the table rather than raw pids.  The example below
+  keeps a per-process sum of total bytes read:
+
+    # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \
+           /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
+
+    # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist
+    # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active]
+
+    { common_pid: gnome-terminal  [      3196] } hitcount:        280  count:    1093512
+    { common_pid: Xorg            [      1309] } hitcount:        525  count:     256640
+    { common_pid: compiz          [      2889] } hitcount:         59  count:     254400
+    { common_pid: bash            [      8710] } hitcount:          3  count:      66369
+    { common_pid: dbus-daemon-lau [      8703] } hitcount:         49  count:      47739
+    { common_pid: irqbalance      [      1252] } hitcount:         27  count:      27648
+    { common_pid: 01ifupdown      [      8705] } hitcount:          3  count:      17216
+    { common_pid: dbus-daemon     [       772] } hitcount:         10  count:      12396
+    { common_pid: Socket Thread   [      8342] } hitcount:         11  count:      11264
+    { common_pid: nm-dhcp-client. [      8701] } hitcount:          6  count:       7424
+    { common_pid: gmain           [      1315] } hitcount:         18  count:       6336
+    .
+    .
+    .
+    { common_pid: postgres        [      1892] } hitcount:          2  count:         32
+    { common_pid: postgres        [      1891] } hitcount:          2  count:         32
+    { common_pid: gmain           [      8704] } hitcount:          2  count:         32
+    { common_pid: upstart-dbus-br [      2740] } hitcount:         21  count:         21
+    { common_pid: nm-dispatcher.a [      8696] } hitcount:          1  count:         16
+    { common_pid: indicator-datet [      2904] } hitcount:          1  count:         16
+    { common_pid: gdbus           [      2998] } hitcount:          1  count:         16
+    { common_pid: rtkit-daemon    [      2052] } hitcount:          1  count:          8
+    { common_pid: init            [         1] } hitcount:          2  count:          2
+
+    Totals:
+        Hits: 2116
+        Entries: 51
+        Dropped: 0
+
+  Similarly, if you key a hist trigger on syscall id, for example to
+  gather and display a list of systemwide syscall hits, you can use
+  the special .syscall modifier to display the syscall names rather
+  than raw ids.  The example below keeps a running total of syscall
+  counts for the system during the run:
+
+    # echo 'hist:key=id.syscall:val=hitcount' > \
+           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+    # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active]
+
+    { id: sys_fsync                     [ 74] } hitcount:          1
+    { id: sys_newuname                  [ 63] } hitcount:          1
+    { id: sys_prctl                     [157] } hitcount:          1
+    { id: sys_statfs                    [137] } hitcount:          1
+    { id: sys_symlink                   [ 88] } hitcount:          1
+    { id: sys_sendmmsg                  [307] } hitcount:          1
+    { id: sys_semctl                    [ 66] } hitcount:          1
+    { id: sys_readlink                  [ 89] } hitcount:          3
+    { id: sys_bind                      [ 49] } hitcount:          3
+    { id: sys_getsockname               [ 51] } hitcount:          3
+    { id: sys_unlink                    [ 87] } hitcount:          3
+    { id: sys_rename                    [ 82] } hitcount:          4
+    { id: unknown_syscall               [ 58] } hitcount:          4
+    { id: sys_connect                   [ 42] } hitcount:          4
+    { id: sys_getpid                    [ 39] } hitcount:          4
+    .
+    .
+    .
+    { id: sys_rt_sigprocmask            [ 14] } hitcount:        952
+    { id: sys_futex                     [202] } hitcount:       1534
+    { id: sys_write                     [  1] } hitcount:       2689
+    { id: sys_setitimer                 [ 38] } hitcount:       2797
+    { id: sys_read                      [  0] } hitcount:       3202
+    { id: sys_select                    [ 23] } hitcount:       3773
+    { id: sys_writev                    [ 20] } hitcount:       4531
+    { id: sys_poll                      [  7] } hitcount:       8314
+    { id: sys_recvmsg                   [ 47] } hitcount:      13738
+    { id: sys_ioctl                     [ 16] } hitcount:      21843
+
+    Totals:
+        Hits: 67612
+        Entries: 72
+        Dropped: 0
+
+    The syscall counts above provide a rough overall picture of system
+    call activity on the system; we can see for example that the most
+    popular system call on this system was the 'sys_ioctl' system call.
+
+    We can use 'compound' keys to refine that number and provide some
+    further insight as to which processes exactly contribute to the
+    overall ioctl count.
+
+    The command below keeps a hitcount for every unique combination of
+    system call id and pid - the end result is essentially a table
+    that keeps a per-pid sum of system call hits.  The results are
+    sorted using the system call id as the primary key, and the
+    hitcount sum as the secondary key:
+
+    # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \
+           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+    # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active]
+
+    { id: sys_read                      [  0], common_pid: rtkit-daemon    [      1877] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: gdbus           [      2976] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: console-kit-dae [      3400] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: postgres        [      1865] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: deja-dup-monito [      3543] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: NetworkManager  [       890] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: evolution-calen [      3048] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: postgres        [      1864] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: nm-applet       [      3022] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: whoopsie        [      1212] } hitcount:          2
+    .
+    .
+    .
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      8479] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      3472] } hitcount:         12
+    { id: sys_ioctl                     [ 16], common_pid: gnome-terminal  [      3199] } hitcount:         16
+    { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:       1808
+    { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:       5580
+    .
+    .
+    .
+    { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2690] } hitcount:          3
+    { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2688] } hitcount:         16
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [       975] } hitcount:          2
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3204] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2888] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3003] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2873] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3196] } hitcount:          6
+    { id: sys_openat                    [257], common_pid: java            [      2623] } hitcount:          2
+    { id: sys_eventfd2                  [290], common_pid: ibus-ui-gtk3    [      2760] } hitcount:          4
+    { id: sys_eventfd2                  [290], common_pid: compiz          [      2994] } hitcount:          6
+
+    Totals:
+        Hits: 31536
+        Entries: 323
+        Dropped: 0
+
+    The above list does give us a breakdown of the ioctl syscall by
+    pid, but it also gives us quite a bit more than that, which we
+    don't really care about at the moment.  Since we know the syscall
+    id for sys_ioctl (16, displayed next to the sys_ioctl name), we
+    can use that to filter out all the other syscalls:
+
+    # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \
+           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+    # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active]
+
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2769] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: evolution-addre [      8571] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      3003] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2781] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2829] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      8726] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      8508] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2970] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2768] } hitcount:          1
+    .
+    .
+    .
+    { id: sys_ioctl                     [ 16], common_pid: pool            [      8559] } hitcount:         45
+    { id: sys_ioctl                     [ 16], common_pid: pool            [      8555] } hitcount:         48
+    { id: sys_ioctl                     [ 16], common_pid: pool            [      8551] } hitcount:         48
+    { id: sys_ioctl                     [ 16], common_pid: avahi-daemon    [       896] } hitcount:         66
+    { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:      26674
+    { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:      73443
+
+    Totals:
+        Hits: 101162
+        Entries: 103
+        Dropped: 0
+
+    The above output shows that 'compiz' and 'Xorg' are far and away
+    the heaviest ioctl callers (which might lead to questions about
+    whether they really need to be making all those calls and to
+    possible avenues for further investigation.)
+
+    The compound key examples used a key and a sum value (hitcount) to
+    sort the output, but we can just as easily use two keys instead.
+    Here's an example where we use a compound key composed of the the
+    common_pid and size event fields.  Sorting with pid as the primary
+    key and 'size' as the secondary key allows us to display an
+    ordered summary of the recvfrom sizes, with counts, received by
+    each process:
+
+    # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \
+           /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger
+
+    # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist
+    # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active]
+
+    { common_pid: smbd            [       784], size:          4 } hitcount:          1
+    { common_pid: dnsmasq         [      1412], size:       4096 } hitcount:        672
+    { common_pid: postgres        [      1796], size:       1000 } hitcount:          6
+    { common_pid: postgres        [      1867], size:       1000 } hitcount:         10
+    { common_pid: bamfdaemon      [      2787], size:         28 } hitcount:          2
+    { common_pid: bamfdaemon      [      2787], size:      14360 } hitcount:          1
+    { common_pid: compiz          [      2994], size:          8 } hitcount:          1
+    { common_pid: compiz          [      2994], size:         20 } hitcount:         11
+    { common_pid: gnome-terminal  [      3199], size:          4 } hitcount:          2
+    { common_pid: firefox         [      8817], size:          4 } hitcount:          1
+    { common_pid: firefox         [      8817], size:          8 } hitcount:          5
+    { common_pid: firefox         [      8817], size:        588 } hitcount:          2
+    { common_pid: firefox         [      8817], size:        628 } hitcount:          1
+    { common_pid: firefox         [      8817], size:       6944 } hitcount:          1
+    { common_pid: firefox         [      8817], size:     408880 } hitcount:          2
+    { common_pid: firefox         [      8822], size:          8 } hitcount:          2
+    { common_pid: firefox         [      8822], size:        160 } hitcount:          2
+    { common_pid: firefox         [      8822], size:        320 } hitcount:          2
+    { common_pid: firefox         [      8822], size:        352 } hitcount:          1
+    .
+    .
+    .
+    { common_pid: pool            [      8923], size:       1960 } hitcount:         10
+    { common_pid: pool            [      8923], size:       2048 } hitcount:         10
+    { common_pid: pool            [      8924], size:       1960 } hitcount:         10
+    { common_pid: pool            [      8924], size:       2048 } hitcount:         10
+    { common_pid: pool            [      8928], size:       1964 } hitcount:          4
+    { common_pid: pool            [      8928], size:       1965 } hitcount:          2
+    { common_pid: pool            [      8928], size:       2048 } hitcount:          6
+    { common_pid: pool            [      8929], size:       1982 } hitcount:          1
+    { common_pid: pool            [      8929], size:       2048 } hitcount:          1
+
+    Totals:
+        Hits: 2016
+        Entries: 224
+        Dropped: 0
+
+  The above example also illustrates the fact that although a compound
+  key is treated as a single entity for hashing purposes, the sub-keys
+  it's composed of can be accessed independently.
+
+  The next example uses a string field as the hash key and
+  demonstrates how you can manually pause and continue a hist trigger.
+  In this example, we'll aggregate fork counts and don't expect a
+  large number of entries in the hash table, so we'll drop it to a
+  much smaller number, say 256:
+
+    # echo 'hist:key=child_comm:val=hitcount:size=256' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
+
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: ibus-daemon                         } hitcount:          1
+    { child_comm: whoopsie                            } hitcount:          1
+    { child_comm: smbd                                } hitcount:          1
+    { child_comm: gdbus                               } hitcount:          1
+    { child_comm: kthreadd                            } hitcount:          1
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: evolution-alarm                     } hitcount:          2
+    { child_comm: Socket Thread                       } hitcount:          2
+    { child_comm: postgres                            } hitcount:          2
+    { child_comm: bash                                } hitcount:          3
+    { child_comm: compiz                              } hitcount:          3
+    { child_comm: evolution-sourc                     } hitcount:          4
+    { child_comm: dhclient                            } hitcount:          4
+    { child_comm: pool                                } hitcount:          5
+    { child_comm: nm-dispatcher.a                     } hitcount:          8
+    { child_comm: firefox                             } hitcount:          8
+    { child_comm: dbus-daemon                         } hitcount:          8
+    { child_comm: glib-pacrunner                      } hitcount:         10
+    { child_comm: evolution                           } hitcount:         23
+
+    Totals:
+        Hits: 89
+        Entries: 20
+        Dropped: 0
+
+  If we want to pause the hist trigger, we can simply append :pause to
+  the command that started the trigger.  Notice that the trigger info
+  displays as [paused]:
+
+    # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused]
+
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: kthreadd                            } hitcount:          1
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: gdbus                               } hitcount:          1
+    { child_comm: ibus-daemon                         } hitcount:          1
+    { child_comm: Socket Thread                       } hitcount:          2
+    { child_comm: evolution-alarm                     } hitcount:          2
+    { child_comm: smbd                                } hitcount:          2
+    { child_comm: bash                                } hitcount:          3
+    { child_comm: whoopsie                            } hitcount:          3
+    { child_comm: compiz                              } hitcount:          3
+    { child_comm: evolution-sourc                     } hitcount:          4
+    { child_comm: pool                                } hitcount:          5
+    { child_comm: postgres                            } hitcount:          6
+    { child_comm: firefox                             } hitcount:          8
+    { child_comm: dhclient                            } hitcount:         10
+    { child_comm: emacs                               } hitcount:         12
+    { child_comm: dbus-daemon                         } hitcount:         20
+    { child_comm: nm-dispatcher.a                     } hitcount:         20
+    { child_comm: evolution                           } hitcount:         35
+    { child_comm: glib-pacrunner                      } hitcount:         59
+
+    Totals:
+        Hits: 199
+        Entries: 21
+        Dropped: 0
+
+  To manually continue having the trigger aggregate events, append
+  :cont instead.  Notice that the trigger info displays as [active]
+  again, and the data has changed:
+
+    # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
+
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: kthreadd                            } hitcount:          1
+    { child_comm: gdbus                               } hitcount:          1
+    { child_comm: ibus-daemon                         } hitcount:          1
+    { child_comm: Socket Thread                       } hitcount:          2
+    { child_comm: evolution-alarm                     } hitcount:          2
+    { child_comm: smbd                                } hitcount:          2
+    { child_comm: whoopsie                            } hitcount:          3
+    { child_comm: compiz                              } hitcount:          3
+    { child_comm: evolution-sourc                     } hitcount:          4
+    { child_comm: bash                                } hitcount:          5
+    { child_comm: pool                                } hitcount:          5
+    { child_comm: postgres                            } hitcount:          6
+    { child_comm: firefox                             } hitcount:          8
+    { child_comm: dhclient                            } hitcount:         11
+    { child_comm: emacs                               } hitcount:         12
+    { child_comm: dbus-daemon                         } hitcount:         22
+    { child_comm: nm-dispatcher.a                     } hitcount:         22
+    { child_comm: evolution                           } hitcount:         35
+    { child_comm: glib-pacrunner                      } hitcount:         59
+
+    Totals:
+        Hits: 206
+        Entries: 21
+        Dropped: 0
+
+  The previous example showed how to start and stop a hist trigger by
+  appending 'pause' and 'continue' to the hist trigger command.  A
+  hist trigger can also be started in a paused state by initially
+  starting the trigger with ':pause' appended.  This allows you to
+  start the trigger only when you're ready to start collecting data
+  and not before.  For example, you could start the trigger in a
+  paused state, then unpause it and do something you want to measure,
+  then pause the trigger again when done.
+
+  Of course, doing this manually can be difficult and error-prone, but
+  it is possible to automatically start and stop a hist trigger based
+  on some condition, via the enable_hist and disable_hist triggers.
+
+  For example, suppose we wanted to take a look at the relative
+  weights in terms of skb length for each callpath that leads to a
+  netif_receieve_skb event when downloading a decent-sized file using
+  wget.
+
+  First we set up an initially paused stacktrace trigger on the
+  netif_receive_skb event:
+
+    # echo 'hist:key=stacktrace:vals=len:pause' > \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  Next, we set up an 'enable_hist' trigger on the sched_process_exec
+  event, with an 'if filename==/usr/bin/wget' filter.  The effect of
+  this new trigger is that it will 'unpause' the hist trigger we just
+  set up on netif_receive_skb if and only if it sees a
+  sched_process_exec event with a filename of '/usr/bin/wget'.  When
+  that happens, all netif_receive_skb events are aggregated into a
+  hash table keyed on stacktrace:
+
+    # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+  The aggregation continues until the netif_receive_skb is paused
+  again, which is what the following disable_hist event does by
+  creating a similar setup on the sched_process_exit event, using the
+  filter 'comm==wget':
+
+    # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+  Whenever a process exits and the comm field of the disable_hist
+  trigger filter matches 'comm==wget', the netif_receive_skb hist
+  trigger is disabled.
+
+  The overall effect is that netif_receive_skb events are aggregated
+  into the hash table for only the duration of the wget.  Executing a
+  wget command and then listing the 'hist' file will display the
+  output generated by the wget command:
+
+    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         netif_receive_skb_internal+0x23/0x90
+         napi_gro_receive+0xc8/0x100
+         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
+         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
+         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
+         ieee80211_rx+0x31d/0x900 [mac80211]
+         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
+         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
+         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
+         irq_thread_fn+0x20/0x50
+         irq_thread+0x11f/0x150
+         kthread+0xd2/0xf0
+         ret_from_fork+0x42/0x70
+    } hitcount:         85  len:      28884
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         netif_receive_skb_internal+0x23/0x90
+         napi_gro_complete+0xa4/0xe0
+         dev_gro_receive+0x23a/0x360
+         napi_gro_receive+0x30/0x100
+         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
+         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
+         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
+         ieee80211_rx+0x31d/0x900 [mac80211]
+         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
+         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
+         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
+         irq_thread_fn+0x20/0x50
+         irq_thread+0x11f/0x150
+         kthread+0xd2/0xf0
+    } hitcount:         98  len:     664329
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         process_backlog+0xa8/0x150
+         net_rx_action+0x15d/0x340
+         __do_softirq+0x114/0x2c0
+         do_softirq_own_stack+0x1c/0x30
+         do_softirq+0x65/0x70
+         __local_bh_enable_ip+0xb5/0xc0
+         ip_finish_output+0x1f4/0x840
+         ip_output+0x6b/0xc0
+         ip_local_out_sk+0x31/0x40
+         ip_send_skb+0x1a/0x50
+         udp_send_skb+0x173/0x2a0
+         udp_sendmsg+0x2bf/0x9f0
+         inet_sendmsg+0x64/0xa0
+         sock_sendmsg+0x3d/0x50
+    } hitcount:        115  len:      13030
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         netif_receive_skb_internal+0x23/0x90
+         napi_gro_complete+0xa4/0xe0
+         napi_gro_flush+0x6d/0x90
+         iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi]
+         irq_thread_fn+0x20/0x50
+         irq_thread+0x11f/0x150
+         kthread+0xd2/0xf0
+         ret_from_fork+0x42/0x70
+    } hitcount:        934  len:    5512212
+
+    Totals:
+        Hits: 1232
+        Entries: 4
+        Dropped: 0
+
+  The above shows all the netif_receive_skb callpaths and their total
+  lengths for the duration of the wget command.
+
+  The 'clear' hist trigger param can be used to clear the hash table.
+  Suppose we wanted to try another run of the previous example but
+  this time also wanted to see the complete list of events that went
+  into the histogram.  In order to avoid having to set everything up
+  again, we can just clear the histogram first:
+
+    # echo 'hist:key=stacktrace:vals=len:clear' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  Just to verify that it is in fact cleared, here's what we now see in
+  the hist file:
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+
+    Totals:
+        Hits: 0
+        Entries: 0
+        Dropped: 0
+
+  Since we want to see the detailed list of every netif_receive_skb
+  event occurring during the new run, which are in fact the same
+  events being aggregated into the hash table, we add some additional
+  'enable_event' events to the triggering sched_process_exec and
+  sched_process_exit events as such:
+
+    # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+    # echo 'disable_event:net:netif_receive_skb if comm==wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+  If you read the trigger files for the sched_process_exec and
+  sched_process_exit triggers, you should see two triggers for each:
+  one enabling/disabling the hist aggregation and the other
+  enabling/disabling the logging of events:
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+    enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
+    enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+    enable_event:net:netif_receive_skb:unlimited if comm==wget
+    disable_hist:net:netif_receive_skb:unlimited if comm==wget
+
+  In other words, whenever either of the sched_process_exec or
+  sched_process_exit events is hit and matches 'wget', it enables or
+  disables both the histogram and the event log, and what you end up
+  with is a hash table and set of events just covering the specified
+  duration.  Run the wget command again:
+
+    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
+
+  Displaying the 'hist' file should show something similar to what you
+  saw in the last run, but this time you should also see the
+  individual events in the trace file:
+
+    # cat /sys/kernel/debug/tracing/trace
+
+    # tracer: nop
+    #
+    # entries-in-buffer/entries-written: 183/1426   #P:4
+    #
+    #                              _-----=> irqs-off
+    #                             / _----=> need-resched
+    #                            | / _---=> hardirq/softirq
+    #                            || / _--=> preempt-depth
+    #                            ||| /     delay
+    #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+    #              | |       |   ||||       |         |
+                wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60
+                wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60
+             dnsmasq-1382  [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130
+             dnsmasq-1382  [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138
+    ##### CPU 2 buffer started ####
+      irq/29-iwlwifi-559   [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948
+      irq/29-iwlwifi-559   [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500
+      irq/29-iwlwifi-559   [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948
+      irq/29-iwlwifi-559   [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948
+      irq/29-iwlwifi-559   [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500
+    .
+    .
+    .
+
+  The following example demonstrates how multiple hist triggers can be
+  attached to a given event.  This capability can be useful for
+  creating a set of different summaries derived from the same set of
+  events, or for comparing the effects of different filters, among
+  other things.
+
+    # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=skbaddr.hex:vals=len' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=len:vals=common_preempt_count' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  The above set of commands create four triggers differing only in
+  their filters, along with a completely different though fairly
+  nonsensical trigger.  Note that in order to append multiple hist
+  triggers to the same file, you should use the '>>' operator to
+  append them ('>' will also add the new hist trigger, but will remove
+  any existing hist triggers beforehand).
+
+  Displaying the contents of the 'hist' file for the event shows the
+  contents of all five histograms:
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+
+    # event histogram
+    #
+    # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active]
+    #
+
+    { len:        176 } hitcount:          1  common_preempt_count:          0
+    { len:        223 } hitcount:          1  common_preempt_count:          0
+    { len:       4854 } hitcount:          1  common_preempt_count:          0
+    { len:        395 } hitcount:          1  common_preempt_count:          0
+    { len:        177 } hitcount:          1  common_preempt_count:          0
+    { len:        446 } hitcount:          1  common_preempt_count:          0
+    { len:       1601 } hitcount:          1  common_preempt_count:          0
+    .
+    .
+    .
+    { len:       1280 } hitcount:         66  common_preempt_count:          0
+    { len:        116 } hitcount:         81  common_preempt_count:         40
+    { len:        708 } hitcount:        112  common_preempt_count:          0
+    { len:         46 } hitcount:        221  common_preempt_count:          0
+    { len:       1264 } hitcount:        458  common_preempt_count:          0
+
+    Totals:
+        Hits: 1428
+        Entries: 147
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+    #
+
+    { skbaddr: ffff8800baee5e00 } hitcount:          1  len:        130
+    { skbaddr: ffff88005f3d5600 } hitcount:          1  len:       1280
+    { skbaddr: ffff88005f3d4900 } hitcount:          1  len:       1280
+    { skbaddr: ffff88009fed6300 } hitcount:          1  len:        115
+    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:        115
+    { skbaddr: ffff88008cdb1900 } hitcount:          1  len:         46
+    { skbaddr: ffff880064b5ef00 } hitcount:          1  len:        118
+    { skbaddr: ffff880044e3c700 } hitcount:          1  len:         60
+    { skbaddr: ffff880100065900 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d46bd500 } hitcount:          1  len:        116
+    { skbaddr: ffff88005f3d5f00 } hitcount:          1  len:       1280
+    { skbaddr: ffff880100064700 } hitcount:          1  len:        365
+    { skbaddr: ffff8800badb6f00 } hitcount:          1  len:         60
+    .
+    .
+    .
+    { skbaddr: ffff88009fe0be00 } hitcount:         27  len:      24677
+    { skbaddr: ffff88009fe0a400 } hitcount:         27  len:      23052
+    { skbaddr: ffff88009fe0b700 } hitcount:         31  len:      25589
+    { skbaddr: ffff88009fe0b600 } hitcount:         32  len:      27326
+    { skbaddr: ffff88006a462800 } hitcount:         68  len:      71678
+    { skbaddr: ffff88006a463700 } hitcount:         70  len:      72678
+    { skbaddr: ffff88006a462b00 } hitcount:         71  len:      77589
+    { skbaddr: ffff88006a463600 } hitcount:         73  len:      71307
+    { skbaddr: ffff88006a462200 } hitcount:         81  len:      81032
+
+    Totals:
+        Hits: 1451
+        Entries: 318
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active]
+    #
+
+
+    Totals:
+        Hits: 0
+        Entries: 0
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active]
+    #
+
+    { skbaddr: ffff88009fd2c300 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcce00 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcd700 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcda00 } hitcount:          1  len:      21492
+    { skbaddr: ffff8800ae2e2d00 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcdb00 } hitcount:          1  len:       7212
+    { skbaddr: ffff88006a4df500 } hitcount:          1  len:       4854
+    { skbaddr: ffff88008ce47b00 } hitcount:          1  len:      18636
+    { skbaddr: ffff8800ae2e2200 } hitcount:          1  len:      12924
+    { skbaddr: ffff88005f3e1000 } hitcount:          1  len:       4356
+    { skbaddr: ffff8800d2bcdc00 } hitcount:          2  len:      24420
+    { skbaddr: ffff8800d2bcc200 } hitcount:          2  len:      12996
+
+    Totals:
+        Hits: 14
+        Entries: 12
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active]
+    #
+
+
+    Totals:
+        Hits: 0
+        Entries: 0
+        Dropped: 0
+
+  Named triggers can be used to have triggers share a common set of
+  histogram data.  This capability is mostly useful for combining the
+  output of events generated by tracepoints contained inside inline
+  functions, but names can be used in a hist trigger on any event.
+  For example, these two triggers when hit will update the same 'len'
+  field in the shared 'foo' histogram data:
+
+    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
+           /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  You can see that they're updating common histogram data by reading
+  each event's hist files at the same time:
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist;
+      cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+    # event histogram
+    #
+    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+    #
+
+    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
+    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
+    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
+    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
+    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
+    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
+    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
+    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
+    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
+    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
+    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
+    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
+    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
+    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
+    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
+    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
+    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
+    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
+    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
+    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
+    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
+    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
+    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
+    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
+    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
+    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
+    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
+    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
+    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
+    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
+    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
+    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
+    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
+    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
+    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
+    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
+    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
+
+    Totals:
+        Hits: 81
+        Entries: 42
+        Dropped: 0
+    # event histogram
+    #
+    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+    #
+
+    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
+    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
+    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
+    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
+    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
+    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
+    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
+    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
+    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
+    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
+    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
+    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
+    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
+    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
+    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
+    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
+    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
+    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
+    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
+    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
+    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
+    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
+    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
+    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
+    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
+    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
+    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
+    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
+    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
+    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
+    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
+    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
+    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
+    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
+    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
+    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
+    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
+
+    Totals:
+        Hits: 81
+        Entries: 42
+        Dropped: 0
+
+  And here's an example that shows how to combine histogram data from
+  any two events even if they don't share any 'compatible' fields
+  other than 'hitcount' and 'stacktrace'.  These commands create a
+  couple of triggers named 'bar' using those fields:
+
+    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+          /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  And displaying the output of either shows some interesting if
+  somewhat confusing output:
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+    # event histogram
+    #
+    # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
+    #
+
+    { stacktrace:
+             _do_fork+0x18e/0x330
+             kernel_thread+0x29/0x30
+             kthreadd+0x154/0x1b0
+             ret_from_fork+0x3f/0x70
+    } hitcount:          1
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx_ni+0x20/0x70
+             dev_loopback_xmit+0xaa/0xd0
+             ip_mc_output+0x126/0x240
+             ip_local_out_sk+0x31/0x40
+             igmp_send_report+0x1e9/0x230
+             igmp_timer_expire+0xe9/0x120
+             call_timer_fn+0x39/0xf0
+             run_timer_softirq+0x1e1/0x290
+             __do_softirq+0xfd/0x290
+             irq_exit+0x98/0xb0
+             smp_apic_timer_interrupt+0x4a/0x60
+             apic_timer_interrupt+0x6d/0x80
+             cpuidle_enter+0x17/0x20
+             call_cpuidle+0x3b/0x60
+             cpu_startup_entry+0x22d/0x310
+    } hitcount:          1
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx_ni+0x20/0x70
+             dev_loopback_xmit+0xaa/0xd0
+             ip_mc_output+0x17f/0x240
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x13e/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             SYSC_sendto+0xef/0x170
+             SyS_sendto+0xe/0x10
+             entry_SYSCALL_64_fastpath+0x12/0x6a
+    } hitcount:          2
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx+0x1c/0x60
+             loopback_xmit+0x6c/0xb0
+             dev_hard_start_xmit+0x219/0x3a0
+             __dev_queue_xmit+0x415/0x4f0
+             dev_queue_xmit_sk+0x13/0x20
+             ip_finish_output2+0x237/0x340
+             ip_finish_output+0x113/0x1d0
+             ip_output+0x66/0xc0
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x16d/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             ___sys_sendmsg+0x14e/0x270
+    } hitcount:         76
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx+0x1c/0x60
+             loopback_xmit+0x6c/0xb0
+             dev_hard_start_xmit+0x219/0x3a0
+             __dev_queue_xmit+0x415/0x4f0
+             dev_queue_xmit_sk+0x13/0x20
+             ip_finish_output2+0x237/0x340
+             ip_finish_output+0x113/0x1d0
+             ip_output+0x66/0xc0
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x16d/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             ___sys_sendmsg+0x269/0x270
+    } hitcount:         77
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx+0x1c/0x60
+             loopback_xmit+0x6c/0xb0
+             dev_hard_start_xmit+0x219/0x3a0
+             __dev_queue_xmit+0x415/0x4f0
+             dev_queue_xmit_sk+0x13/0x20
+             ip_finish_output2+0x237/0x340
+             ip_finish_output+0x113/0x1d0
+             ip_output+0x66/0xc0
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x16d/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             SYSC_sendto+0xef/0x170
+    } hitcount:         88
+    { stacktrace:
+             _do_fork+0x18e/0x330
+             SyS_clone+0x19/0x20
+             entry_SYSCALL_64_fastpath+0x12/0x6a
+    } hitcount:        244
+
+    Totals:
+        Hits: 489
+        Entries: 7
+        Dropped: 0
+
+
+2.2 Inter-event hist triggers
+-----------------------------
+
+Inter-event hist triggers are hist triggers that combine values from
+one or more other events and create a histogram using that data.  Data
+from an inter-event histogram can in turn become the source for
+further combined histograms, thus providing a chain of related
+histograms, which is important for some applications.
+
+The most important example of an inter-event quantity that can be used
+in this manner is latency, which is simply a difference in timestamps
+between two events.  Although latency is the most important
+inter-event quantity, note that because the support is completely
+general across the trace event subsystem, any event field can be used
+in an inter-event quantity.
+
+An example of a histogram that combines data from other histograms
+into a useful chain would be a 'wakeupswitch latency' histogram that
+combines a 'wakeup latency' histogram and a 'switch latency'
+histogram.
+
+Normally, a hist trigger specification consists of a (possibly
+compound) key along with one or more numeric values, which are
+continually updated sums associated with that key.  A histogram
+specification in this case consists of individual key and value
+specifications that refer to trace event fields associated with a
+single event type.
+
+The inter-event hist trigger extension allows fields from multiple
+events to be referenced and combined into a multi-event histogram
+specification.  In support of this overall goal, a few enabling
+features have been added to the hist trigger support:
+
+  - In order to compute an inter-event quantity, a value from one
+    event needs to saved and then referenced from another event.  This
+    requires the introduction of support for histogram 'variables'.
+
+  - The computation of inter-event quantities and their combination
+    require some minimal amount of support for applying simple
+    expressions to variables (+ and -).
+
+  - A histogram consisting of inter-event quantities isn't logically a
+    histogram on either event (so having the 'hist' file for either
+    event host the histogram output doesn't really make sense).  To
+    address the idea that the histogram is associated with a
+    combination of events, support is added allowing the creation of
+    'synthetic' events that are events derived from other events.
+    These synthetic events are full-fledged events just like any other
+    and can be used as such, as for instance to create the
+    'combination' histograms mentioned previously.
+
+  - A set of 'actions' can be associated with histogram entries -
+    these can be used to generate the previously mentioned synthetic
+    events, but can also be used for other purposes, such as for
+    example saving context when a 'max' latency has been hit.
+
+  - Trace events don't have a 'timestamp' associated with them, but
+    there is an implicit timestamp saved along with an event in the
+    underlying ftrace ring buffer.  This timestamp is now exposed as a
+    a synthetic field named 'common_timestamp' which can be used in
+    histograms as if it were any other event field; it isn't an actual
+    field in the trace format but rather is a synthesized value that
+    nonetheless can be used as if it were an actual field.  By default
+    it is in units of nanoseconds; appending '.usecs' to a
+    common_timestamp field changes the units to microseconds.
+
+A note on inter-event timestamps: If common_timestamp is used in a
+histogram, the trace buffer is automatically switched over to using
+absolute timestamps and the "global" trace clock, in order to avoid
+bogus timestamp differences with other clocks that aren't coherent
+across CPUs.  This can be overridden by specifying one of the other
+trace clocks instead, using the "clock=XXX" hist trigger attribute,
+where XXX is any of the clocks listed in the tracing/trace_clock
+pseudo-file.
+
+These features are described in more detail in the following sections.
+
+2.2.1 Histogram Variables
+-------------------------
+
+Variables are simply named locations used for saving and retrieving
+values between matching events.  A 'matching' event is defined as an
+event that has a matching key - if a variable is saved for a histogram
+entry corresponding to that key, any subsequent event with a matching
+key can access that variable.
+
+A variable's value is normally available to any subsequent event until
+it is set to something else by a subsequent event.  The one exception
+to that rule is that any variable used in an expression is essentially
+'read-once' - once it's used by an expression in a subsequent event,
+it's reset to its 'unset' state, which means it can't be used again
+unless it's set again.  This ensures not only that an event doesn't
+use an uninitialized variable in a calculation, but that that variable
+is used only once and not for any unrelated subsequent match.
+
+The basic syntax for saving a variable is to simply prefix a unique
+variable name not corresponding to any keyword along with an '=' sign
+to any event field.
+
+Either keys or values can be saved and retrieved in this way.  This
+creates a variable named 'ts0' for a histogram entry with the key
+'next_pid':
+
+  # echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ... >> \
+       event/trigger
+
+The ts0 variable can be accessed by any subsequent event having the
+same pid as 'next_pid'.
+
+Variable references are formed by prepending the variable name with
+the '$' sign.  Thus for example, the ts0 variable above would be
+referenced as '$ts0' in expressions.
+
+Because 'vals=' is used, the common_timestamp variable value above
+will also be summed as a normal histogram value would (though for a
+timestamp it makes little sense).
+
+The below shows that a key value can also be saved in the same way:
+
+  # echo 'hist:timer_pid=common_pid:key=timer_pid ...' >> event/trigger
+
+If a variable isn't a key variable or prefixed with 'vals=', the
+associated event field will be saved in a variable but won't be summed
+as a value:
+
+  # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger
+
+Multiple variables can be assigned at the same time.  The below would
+result in both ts0 and b being created as variables, with both
+common_timestamp and field1 additionally being summed as values:
+
+  # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \
+       event/trigger
+
+Note that variable assignments can appear either preceding or
+following their use.  The command below behaves identically to the
+command above:
+
+  # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \
+       event/trigger
+
+Any number of variables not bound to a 'vals=' prefix can also be
+assigned by simply separating them with colons.  Below is the same
+thing but without the values being summed in the histogram:
+
+  # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger
+
+Variables set as above can be referenced and used in expressions on
+another event.
+
+For example, here's how a latency can be calculated:
+
+  # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger
+  # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger
+
+In the first line above, the event's timetamp is saved into the
+variable ts0.  In the next line, ts0 is subtracted from the second
+event's timestamp to produce the latency, which is then assigned into
+yet another variable, 'wakeup_lat'.  The hist trigger below in turn
+makes use of the wakeup_lat variable to compute a combined latency
+using the same key and variable from yet another event:
+
+  # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger
+
+2.2.2 Synthetic Events
+----------------------
+
+Synthetic events are user-defined events generated from hist trigger
+variables or fields associated with one or more other events.  Their
+purpose is to provide a mechanism for displaying data spanning
+multiple events consistent with the existing and already familiar
+usage for normal events.
+
+To define a synthetic event, the user writes a simple specification
+consisting of the name of the new event along with one or more
+variables and their types, which can be any valid field type,
+separated by semicolons, to the tracing/synthetic_events file.
+
+For instance, the following creates a new event named 'wakeup_latency'
+with 3 fields: lat, pid, and prio.  Each of those fields is simply a
+variable reference to a variable on another event:
+
+  # echo 'wakeup_latency \
+          u64 lat; \
+          pid_t pid; \
+         int prio' >> \
+         /sys/kernel/debug/tracing/synthetic_events
+
+Reading the tracing/synthetic_events file lists all the currently
+defined synthetic events, in this case the event defined above:
+
+  # cat /sys/kernel/debug/tracing/synthetic_events
+    wakeup_latency u64 lat; pid_t pid; int prio
+
+An existing synthetic event definition can be removed by prepending
+the command that defined it with a '!':
+
+  # echo '!wakeup_latency u64 lat pid_t pid int prio' >> \
+    /sys/kernel/debug/tracing/synthetic_events
+
+At this point, there isn't yet an actual 'wakeup_latency' event
+instantiated in the event subsytem - for this to happen, a 'hist
+trigger action' needs to be instantiated and bound to actual fields
+and variables defined on other events (see Section 6.3.3 below).
+
+Once that is done, an event instance is created, and a histogram can
+be defined using it:
+
+  # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
+        /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+The new event is created under the tracing/events/synthetic/ directory
+and looks and behaves just like any other event:
+
+  # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
+        enable  filter  format  hist  id  trigger
+
+Like any other event, once a histogram is enabled for the event, the
+output can be displayed by reading the event's 'hist' file.
+
+2.2.3 Hist trigger 'actions'
+----------------------------
+
+A hist trigger 'action' is a function that's executed whenever a
+histogram entry is added or updated.
+
+The default 'action' if no special function is explicity specified is
+as it always has been, to simply update the set of values associated
+with an entry.  Some applications, however, may want to perform
+additional actions at that point, such as generate another event, or
+compare and save a maximum.
+
+The following additional actions are available.  To specify an action
+for a given event, simply specify the action between colons in the
+hist trigger specification.
+
+  - onmatch(matching.event).<synthetic_event_name>(param list)
+
+    The 'onmatch(matching.event).<synthetic_event_name>(params)' hist
+    trigger action is invoked whenever an event matches and the
+    histogram entry would be added or updated.  It causes the named
+    synthetic event to be generated with the values given in the
+    'param list'.  The result is the generation of a synthetic event
+    that consists of the values contained in those variables at the
+    time the invoking event was hit.
+
+    The 'param list' consists of one or more parameters which may be
+    either variables or fields defined on either the 'matching.event'
+    or the target event.  The variables or fields specified in the
+    param list may be either fully-qualified or unqualified.  If a
+    variable is specified as unqualified, it must be unique between
+    the two events.  A field name used as a param can be unqualified
+    if it refers to the target event, but must be fully qualified if
+    it refers to the matching event.  A fully-qualified name is of the
+    form 'system.event_name.$var_name' or 'system.event_name.field'.
+
+    The 'matching.event' specification is simply the fully qualified
+    event name of the event that matches the target event for the
+    onmatch() functionality, in the form 'system.event_name'.
+
+    Finally, the number and type of variables/fields in the 'param
+    list' must match the number and types of the fields in the
+    synthetic event being generated.
+
+    As an example the below defines a simple synthetic event and uses
+    a variable defined on the sched_wakeup_new event as a parameter
+    when invoking the synthetic event.  Here we define the synthetic
+    event:
+
+    # echo 'wakeup_new_test pid_t pid' >> \
+           /sys/kernel/debug/tracing/synthetic_events
+
+    # cat /sys/kernel/debug/tracing/synthetic_events
+          wakeup_new_test pid_t pid
+
+    The following hist trigger both defines the missing testpid
+    variable and specifies an onmatch() action that generates a
+    wakeup_new_test synthetic event whenever a sched_wakeup_new event
+    occurs, which because of the 'if comm == "cyclictest"' filter only
+    happens when the executable is cyclictest:
+
+    # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\
+            wakeup_new_test($testpid) if comm=="cyclictest"' >> \
+            /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger
+
+    Creating and displaying a histogram based on those events is now
+    just a matter of using the fields and new synthetic event in the
+    tracing/events/synthetic directory, as usual:
+
+    # echo 'hist:keys=pid:sort=pid' >> \
+           /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger
+
+    Running 'cyclictest' should cause wakeup_new events to generate
+    wakeup_new_test synthetic events which should result in histogram
+    output in the wakeup_new_test event's hist file:
+
+    # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/hist
+
+    A more typical usage would be to use two events to calculate a
+    latency.  The following example uses a set of hist triggers to
+    produce a 'wakeup_latency' histogram:
+
+    First, we define a 'wakeup_latency' synthetic event:
+
+    # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \
+            /sys/kernel/debug/tracing/synthetic_events
+
+    Next, we specify that whenever we see a sched_waking event for a
+    cyclictest thread, save the timestamp in a 'ts0' variable:
+
+    # echo 'hist:keys=$saved_pid:saved_pid=pid:ts0=common_timestamp.usecs \
+            if comm=="cyclictest"' >> \
+           /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+
+    Then, when the corresponding thread is actually scheduled onto the
+    CPU by a sched_switch event, calculate the latency and use that
+    along with another variable and an event field to generate a
+    wakeup_latency synthetic event:
+
+    # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:\
+            onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,\
+                   $saved_pid,next_prio) if next_comm=="cyclictest"' >> \
+           /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+
+    We also need to create a histogram on the wakeup_latency synthetic
+    event in order to aggregate the generated synthetic event data:
+
+    # echo 'hist:keys=pid,prio,lat:sort=pid,lat' >> \
+            /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+    Finally, once we've run cyclictest to actually generate some
+    events, we can see the output by looking at the wakeup_latency
+    synthetic event's hist file:
+
+    # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist
+
+  - onmax(var).save(field,..   .)
+
+    The 'onmax(var).save(field,...)' hist trigger action is invoked
+    whenever the value of 'var' associated with a histogram entry
+    exceeds the current maximum contained in that variable.
+
+    The end result is that the trace event fields specified as the
+    onmax.save() params will be saved if 'var' exceeds the current
+    maximum for that hist trigger entry.  This allows context from the
+    event that exhibited the new maximum to be saved for later
+    reference.  When the histogram is displayed, additional fields
+    displaying the saved values will be printed.
+
+    As an example the below defines a couple of hist triggers, one for
+    sched_waking and another for sched_switch, keyed on pid.  Whenever
+    a sched_waking occurs, the timestamp is saved in the entry
+    corresponding to the current pid, and when the scheduler switches
+    back to that pid, the timestamp difference is calculated.  If the
+    resulting latency, stored in wakeup_lat, exceeds the current
+    maximum latency, the values specified in the save() fields are
+    recoreded:
+
+    # echo 'hist:keys=pid:ts0=common_timestamp.usecs \
+            if comm=="cyclictest"' >> \
+            /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+
+    # echo 'hist:keys=next_pid:\
+            wakeup_lat=common_timestamp.usecs-$ts0:\
+            onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \
+            if next_comm=="cyclictest"' >> \
+            /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+
+    When the histogram is displayed, the max value and the saved
+    values corresponding to the max are displayed following the rest
+    of the fields:
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
+      { next_pid:       2255 } hitcount:        239
+        common_timestamp-ts0:          0
+        max:         27
+       next_comm: cyclictest
+        prev_pid:          0  prev_prio:        120  prev_comm: swapper/1
+
+      { next_pid:       2256 } hitcount:       2355
+        common_timestamp-ts0: 0
+        max:         49  next_comm: cyclictest
+        prev_pid:          0  prev_prio:        120  prev_comm: swapper/0
+
+      Totals:
+          Hits: 12970
+          Entries: 2
+          Dropped: 0
index 4d3aac9f4a5dcbe86df5d81e41dcc70b82a819a4..2d1d6f69e91bda77cdf3eb4a361c6a57ca8b897c 100644 (file)
 Heterogeneous Memory Management (HMM)
 
-Transparently allow any component of a program to use any memory region of said
-program with a device without using device specific memory allocator. This is
-becoming a requirement to simplify the use of advance heterogeneous computing
-where GPU, DSP or FPGA are use to perform various computations.
-
-This document is divided as follow, in the first section i expose the problems
-related to the use of a device specific allocator. The second section i expose
-the hardware limitations that are inherent to many platforms. The third section
-gives an overview of HMM designs. The fourth section explains how CPU page-
-table mirroring works and what is HMM purpose in this context. Fifth section
-deals with how device memory is represented inside the kernel. Finaly the last
-section present the new migration helper that allow to leverage the device DMA
-engine.
-
-
-1) Problems of using device specific memory allocator:
-2) System bus, device memory characteristics
-3) Share address space and migration
+Provide infrastructure and helpers to integrate non-conventional memory (device
+memory like GPU on board memory) into regular kernel path, with the cornerstone
+of this being specialized struct page for such memory (see sections 5 to 7 of
+this document).
+
+HMM also provides optional helpers for SVM (Share Virtual Memory), i.e.,
+allowing a device to transparently access program address coherently with the
+CPU meaning that any valid pointer on the CPU is also a valid pointer for the
+device. This is becoming mandatory to simplify the use of advanced hetero-
+geneous computing where GPU, DSP, or FPGA are used to perform various
+computations on behalf of a process.
+
+This document is divided as follows: in the first section I expose the problems
+related to using device specific memory allocators. In the second section, I
+expose the hardware limitations that are inherent to many platforms. The third
+section gives an overview of the HMM design. The fourth section explains how
+CPU page-table mirroring works and the purpose of HMM in this context. The
+fifth section deals with how device memory is represented inside the kernel.
+Finally, the last section presents a new migration helper that allows lever-
+aging the device DMA engine.
+
+
+1) Problems of using a device specific memory allocator:
+2) I/O bus, device memory characteristics
+3) Shared address space and migration
 4) Address space mirroring implementation and API
 5) Represent and manage device memory from core kernel point of view
-6) Migrate to and from device memory
+6) Migration to and from device memory
 7) Memory cgroup (memcg) and rss accounting
 
 
 -------------------------------------------------------------------------------
 
-1) Problems of using device specific memory allocator:
-
-Device with large amount of on board memory (several giga bytes) like GPU have
-historically manage their memory through dedicated driver specific API. This
-creates a disconnect between memory allocated and managed by device driver and
-regular application memory (private anonymous, share memory or regular file
-back memory). From here on i will refer to this aspect as split address space.
-I use share address space to refer to the opposite situation ie one in which
-any memory region can be use by device transparently.
-
-Split address space because device can only access memory allocated through the
-device specific API. This imply that all memory object in a program are not
-equal from device point of view which complicate large program that rely on a
-wide set of libraries.
-
-Concretly this means that code that wants to leverage device like GPU need to
-copy object between genericly allocated memory (malloc, mmap private/share/)
-and memory allocated through the device driver API (this still end up with an
-mmap but of the device file).
-
-For flat dataset (array, grid, image, ...) this isn't too hard to achieve but
-complex data-set (list, tree, ...) are hard to get right. Duplicating a complex
-data-set need to re-map all the pointer relations between each of its elements.
-This is error prone and program gets harder to debug because of the duplicate
-data-set.
-
-Split address space also means that library can not transparently use data they
-are getting from core program or other library and thus each library might have
-to duplicate its input data-set using specific memory allocator. Large project
-suffer from this and waste resources because of the various memory copy.
-
-Duplicating each library API to accept as input or output memory allocted by
+1) Problems of using a device specific memory allocator:
+
+Devices with a large amount of on board memory (several gigabytes) like GPUs
+have historically managed their memory through dedicated driver specific APIs.
+This creates a disconnect between memory allocated and managed by a device
+driver and regular application memory (private anonymous, shared memory, or
+regular file backed memory). From here on I will refer to this aspect as split
+address space. I use shared address space to refer to the opposite situation:
+i.e., one in which any application memory region can be used by a device
+transparently.
+
+Split address space happens because device can only access memory allocated
+through device specific API. This implies that all memory objects in a program
+are not equal from the device point of view which complicates large programs
+that rely on a wide set of libraries.
+
+Concretely this means that code that wants to leverage devices like GPUs needs
+to copy object between generically allocated memory (malloc, mmap private, mmap
+share) and memory allocated through the device driver API (this still ends up
+with an mmap but of the device file).
+
+For flat data sets (array, grid, image, ...) this isn't too hard to achieve but
+complex data sets (list, tree, ...) are hard to get right. Duplicating a
+complex data set needs to re-map all the pointer relations between each of its
+elements. This is error prone and program gets harder to debug because of the
+duplicate data set and addresses.
+
+Split address space also means that libraries cannot transparently use data
+they are getting from the core program or another library and thus each library
+might have to duplicate its input data set using the device specific memory
+allocator. Large projects suffer from this and waste resources because of the
+various memory copies.
+
+Duplicating each library API to accept as input or output memory allocated by
 each device specific allocator is not a viable option. It would lead to a
-combinatorial explosions in the library entry points.
+combinatorial explosion in the library entry points.
 
-Finaly with the advance of high level language constructs (in C++ but in other
-language too) it is now possible for compiler to leverage GPU or other devices
-without even the programmer knowledge. Some of compiler identified patterns are
-only do-able with a share address. It is as well more reasonable to use a share
-address space for all the other patterns.
+Finally, with the advance of high level language constructs (in C++ but in
+other languages too) it is now possible for the compiler to leverage GPUs and
+other devices without programmer knowledge. Some compiler identified patterns
+are only do-able with a shared address space. It is also more reasonable to use
+a shared address space for all other patterns.
 
 
 -------------------------------------------------------------------------------
 
-2) System bus, device memory characteristics
+2) I/O bus, device memory characteristics
 
-System bus cripple share address due to few limitations. Most system bus only
-allow basic memory access from device to main memory, even cache coherency is
-often optional. Access to device memory from CPU is even more limited, most
-often than not it is not cache coherent.
+I/O buses cripple shared address spaces due to a few limitations. Most I/O
+buses only allow basic memory access from device to main memory; even cache
+coherency is often optional. Access to device memory from CPU is even more
+limited. More often than not, it is not cache coherent.
 
-If we only consider the PCIE bus than device can access main memory (often
-through an IOMMU) and be cache coherent with the CPUs. However it only allows
-a limited set of atomic operation from device on main memory. This is worse
-in the other direction the CPUs can only access a limited range of the device
-memory and can not perform atomic operations on it. Thus device memory can not
-be consider like regular memory from kernel point of view.
+If we only consider the PCIE bus, then a device can access main memory (often
+through an IOMMU) and be cache coherent with the CPUs. However, it only allows
+a limited set of atomic operations from device on main memory. This is worse
+in the other direction: the CPU can only access a limited range of the device
+memory and cannot perform atomic operations on it. Thus device memory cannot
+be considered the same as regular memory from the kernel point of view.
 
 Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0
-and 16 lanes). This is 33 times less that fastest GPU memory (1 TBytes/s).
-The final limitation is latency, access to main memory from the device has an
-order of magnitude higher latency than when the device access its own memory.
+and 16 lanes). This is 33 times less than the fastest GPU memory (1 TBytes/s).
+The final limitation is latency. Access to main memory from the device has an
+order of magnitude higher latency than when the device accesses its own memory.
 
-Some platform are developing new system bus or additions/modifications to PCIE
-to address some of those limitations (OpenCAPI, CCIX). They mainly allow two
+Some platforms are developing new I/O buses or additions/modifications to PCIE
+to address some of these limitations (OpenCAPI, CCIX). They mainly allow two-
 way cache coherency between CPU and device and allow all atomic operations the
-architecture supports. Saddly not all platform are following this trends and
-some major architecture are left without hardware solutions to those problems.
+architecture supports. Sadly, not all platforms are following this trend and
+some major architectures are left without hardware solutions to these problems.
 
-So for share address space to make sense not only we must allow device to
-access any memory memory but we must also permit any memory to be migrated to
-device memory while device is using it (blocking CPU access while it happens).
+So for shared address space to make sense, not only must we allow devices to
+access any memory but we must also permit any memory to be migrated to device
+memory while device is using it (blocking CPU access while it happens).
 
 
 -------------------------------------------------------------------------------
 
-3) Share address space and migration
+3) Shared address space and migration
 
 HMM intends to provide two main features. First one is to share the address
-space by duplication the CPU page table into the device page table so same
-address point to same memory and this for any valid main memory address in
+space by duplicating the CPU page table in the device page table so the same
+address points to the same physical memory for any valid main memory address in
 the process address space.
 
-To achieve this, HMM offer a set of helpers to populate the device page table
+To achieve this, HMM offers a set of helpers to populate the device page table
 while keeping track of CPU page table updates. Device page table updates are
-not as easy as CPU page table updates. To update the device page table you must
-allow a buffer (or use a pool of pre-allocated buffer) and write GPU specifics
-commands in it to perform the update (unmap, cache invalidations and flush,
-...). This can not be done through common code for all device. Hence why HMM
-provides helpers to factor out everything that can be while leaving the gory
-details to the device driver.
-
-The second mechanism HMM provide is a new kind of ZONE_DEVICE memory that does
-allow to allocate a struct page for each page of the device memory. Those page
-are special because the CPU can not map them. They however allow to migrate
-main memory to device memory using exhisting migration mechanism and everything
-looks like if page was swap out to disk from CPU point of view. Using a struct
-page gives the easiest and cleanest integration with existing mm mechanisms.
-Again here HMM only provide helpers, first to hotplug new ZONE_DEVICE memory
-for the device memory and second to perform migration. Policy decision of what
-and when to migrate things is left to the device driver.
-
-Note that any CPU access to a device page trigger a page fault and a migration
-back to main memory ie when a page backing an given address A is migrated from
-a main memory page to a device page then any CPU access to address A trigger a
-page fault and initiate a migration back to main memory.
-
-
-With this two features, HMM not only allow a device to mirror a process address
-space and keeps both CPU and device page table synchronize, but also allow to
-leverage device memory by migrating part of data-set that is actively use by a
-device.
+not as easy as CPU page table updates. To update the device page table, you must
+allocate a buffer (or use a pool of pre-allocated buffers) and write GPU
+specific commands in it to perform the update (unmap, cache invalidations, and
+flush, ...). This cannot be done through common code for all devices. Hence
+why HMM provides helpers to factor out everything that can be while leaving the
+hardware specific details to the device driver.
+
+The second mechanism HMM provides is a new kind of ZONE_DEVICE memory that
+allows allocating a struct page for each page of the device memory. Those pages
+are special because the CPU cannot map them. However, they allow migrating
+main memory to device memory using existing migration mechanisms and everything
+looks like a page is swapped out to disk from the CPU point of view. Using a
+struct page gives the easiest and cleanest integration with existing mm mech-
+anisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE
+memory for the device memory and second to perform migration. Policy decisions
+of what and when to migrate things is left to the device driver.
+
+Note that any CPU access to a device page triggers a page fault and a migration
+back to main memory. For example, when a page backing a given CPU address A is
+migrated from a main memory page to a device page, then any CPU access to
+address A triggers a page fault and initiates a migration back to main memory.
+
+With these two features, HMM not only allows a device to mirror process address
+space and keeping both CPU and device page table synchronized, but also lever-
+ages device memory by migrating the part of the data set that is actively being
+used by the device.
 
 
 -------------------------------------------------------------------------------
 
 4) Address space mirroring implementation and API
 
-Address space mirroring main objective is to allow to duplicate range of CPU
-page table into a device page table and HMM helps keeping both synchronize. A
-device driver that want to mirror a process address space must start with the
+Address space mirroring's main objective is to allow duplication of a range of
+CPU page table into a device page table; HMM helps keep both synchronized. A
+device driver that wants to mirror a process address space must start with the
 registration of an hmm_mirror struct:
 
  int hmm_mirror_register(struct hmm_mirror *mirror,
@@ -154,9 +162,9 @@ registration of an hmm_mirror struct:
  int hmm_mirror_register_locked(struct hmm_mirror *mirror,
                                 struct mm_struct *mm);
 
-The locked variant is to be use when the driver is already holding the mmap_sem
-of the mm in write mode. The mirror struct has a set of callback that are use
-to propagate CPU page table:
+The locked variant is to be used when the driver is already holding mmap_sem
+of the mm in write mode. The mirror struct has a set of callbacks that are used
+to propagate CPU page tables:
 
  struct hmm_mirror_ops {
      /* sync_cpu_device_pagetables() - synchronize page tables
@@ -181,13 +189,13 @@ to propagate CPU page table:
                      unsigned long end);
  };
 
-Device driver must perform update to the range following action (turn range
-read only, or fully unmap, ...). Once driver callback returns the device must
-be done with the update.
+The device driver must perform the update action to the range (mark range
+read only, or fully unmap, ...). The device must be done with the update before
+the driver callback returns.
 
 
-When device driver wants to populate a range of virtual address it can use
-either:
+When the device driver wants to populate a range of virtual addresses, it can
+use either:
  int hmm_vma_get_pfns(struct vm_area_struct *vma,
                       struct hmm_range *range,
                       unsigned long start,
@@ -201,17 +209,19 @@ either:
                    bool write,
                    bool block);
 
-First one (hmm_vma_get_pfns()) will only fetch present CPU page table entry and
-will not trigger a page fault on missing or non present entry. The second one
-do trigger page fault on missing or read only entry if write parameter is true.
-Page fault use the generic mm page fault code path just like a CPU page fault.
+The first one (hmm_vma_get_pfns()) will only fetch present CPU page table
+entries and will not trigger a page fault on missing or non-present entries.
+The second one does trigger a page fault on missing or read-only entry if the
+write parameter is true. Page faults use the generic mm page fault code path
+just like a CPU page fault.
 
-Both function copy CPU page table into their pfns array argument. Each entry in
-that array correspond to an address in the virtual range. HMM provide a set of
-flags to help driver identify special CPU page table entries.
+Both functions copy CPU page table entries into their pfns array argument. Each
+entry in that array corresponds to an address in the virtual range. HMM
+provides a set of flags to help the driver identify special CPU page table
+entries.
 
 Locking with the update() callback is the most important aspect the driver must
-respect in order to keep things properly synchronize. The usage pattern is :
+respect in order to keep things properly synchronized. The usage pattern is:
 
  int driver_populate_range(...)
  {
@@ -233,43 +243,44 @@ respect in order to keep things properly synchronize. The usage pattern is :
       return 0;
  }
 
-The driver->update lock is the same lock that driver takes inside its update()
-callback. That lock must be call before hmm_vma_range_done() to avoid any race
-with a concurrent CPU page table update.
+The driver->update lock is the same lock that the driver takes inside its
+update() callback. That lock must be held before hmm_vma_range_done() to avoid
+any race with a concurrent CPU page table update.
 
-HMM implements all this on top of the mmu_notifier API because we wanted to a
-simpler API and also to be able to perform optimization latter own like doing
-concurrent device update in multi-devices scenario.
+HMM implements all this on top of the mmu_notifier API because we wanted a
+simpler API and also to be able to perform optimizations latter on like doing
+concurrent device updates in multi-devices scenario.
 
-HMM also serve as an impedence missmatch between how CPU page table update are
-done (by CPU write to the page table and TLB flushes) from how device update
-their own page table. Device update is a multi-step process, first appropriate
-commands are write to a buffer, then this buffer is schedule for execution on
-the device. It is only once the device has executed commands in the buffer that
-the update is done. Creating and scheduling update command buffer can happen
-concurrently for multiple devices. Waiting for each device to report commands
-as executed is serialize (there is no point in doing this concurrently).
+HMM also serves as an impedance mismatch between how CPU page table updates
+are done (by CPU write to the page table and TLB flushes) and how devices
+update their own page table. Device updates are a multi-step process. First,
+appropriate commands are written to a buffer, then this buffer is scheduled for
+execution on the device. It is only once the device has executed commands in
+the buffer that the update is done. Creating and scheduling the update command
+buffer can happen concurrently for multiple devices. Waiting for each device to
+report commands as executed is serialized (there is no point in doing this
+concurrently).
 
 
 -------------------------------------------------------------------------------
 
 5) Represent and manage device memory from core kernel point of view
 
-Several differents design were try to support device memory. First one use
-device specific data structure to keep information about migrated memory and
-HMM hooked itself in various place of mm code to handle any access to address
-that were back by device memory. It turns out that this ended up replicating
-most of the fields of struct page and also needed many kernel code path to be
-updated to understand this new kind of memory.
+Several different designs were tried to support device memory. First one used
+device specific data structure to keep information about migrated memory and
+HMM hooked itself in various places of mm code to handle any access to
+addresses that were backed by device memory. It turns out that this ended up
+replicating most of the fields of struct page and also needed many kernel code
+paths to be updated to understand this new kind of memory.
 
-Thing is most kernel code path never try to access the memory behind a page
-but only care about struct page contents. Because of this HMM switchted to
-directly using struct page for device memory which left most kernel code path
-un-aware of the difference. We only need to make sure that no one ever try to
-map those page from the CPU side.
+Most kernel code paths never try to access the memory behind a page
+but only care about struct page contents. Because of this, HMM switched to
+directly using struct page for device memory which left most kernel code paths
+unaware of the difference. We only need to make sure that no one ever tries to
+map those pages from the CPU side.
 
-HMM provide a set of helpers to register and hotplug device memory as a new
-region needing struct page. This is offer through a very simple API:
+HMM provides a set of helpers to register and hotplug device memory as a new
+region needing a struct page. This is offered through a very simple API:
 
  struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
                                    struct device *device,
@@ -289,18 +300,19 @@ The hmm_devmem_ops is where most of the important things are:
  };
 
 The first callback (free()) happens when the last reference on a device page is
-drop. This means the device page is now free and no longer use by anyone. The
-second callback happens whenever CPU try to access a device page which it can
-not do. This second callback must trigger a migration back to system memory.
+dropped. This means the device page is now free and no longer used by anyone.
+The second callback happens whenever the CPU tries to access a device page
+which it cannot do. This second callback must trigger a migration back to
+system memory.
 
 
 -------------------------------------------------------------------------------
 
-6) Migrate to and from device memory
+6) Migration to and from device memory
 
-Because CPU can not access device memory, migration must use device DMA engine
-to perform copy from and to device memory. For this we need a new migration
-helper:
+Because the CPU cannot access device memory, migration must use the device DMA
+engine to perform copy from and to device memory. For this we need a new
+migration helper:
 
  int migrate_vma(const struct migrate_vma_ops *ops,
                  struct vm_area_struct *vma,
@@ -311,15 +323,15 @@ helper:
                  unsigned long *dst,
                  void *private);
 
-Unlike other migration function it works on a range of virtual address, there
-is two reasons for that. First device DMA copy has a high setup overhead cost
+Unlike other migration functions it works on a range of virtual address, there
+are two reasons for that. First, device DMA copy has a high setup overhead cost
 and thus batching multiple pages is needed as otherwise the migration overhead
-make the whole excersie pointless. The second reason is because driver trigger
-such migration base on range of address the device is actively accessing.
+makes the whole exercise pointless. The second reason is because the
+migration might be for a range of addresses the device is actively accessing.
 
-The migrate_vma_ops struct define two callbacks. First one (alloc_and_copy())
-control destination memory allocation and copy operation. Second one is there
-to allow device driver to perform cleanup operation after migration.
+The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
+controls destination memory allocation and copy operation. Second one is there
+to allow the device driver to perform cleanup operations after migration.
 
  struct migrate_vma_ops {
      void (*alloc_and_copy)(struct vm_area_struct *vma,
@@ -336,19 +348,19 @@ to allow device driver to perform cleanup operation after migration.
                               void *private);
  };
 
-It is important to stress that this migration helpers allow for hole in the
+It is important to stress that these migration helpers allow for holes in the
 virtual address range. Some pages in the range might not be migrated for all
-the usual reasons (page is pin, page is lock, ...). This helper does not fail
-but just skip over those pages.
+the usual reasons (page is pinned, page is locked, ...). This helper does not
+fail but just skips over those pages.
 
-The alloc_and_copy() might as well decide to not migrate all pages in the
-range (for reasons under the callback control). For those the callback just
-have to leave the corresponding dst entry empty.
+The alloc_and_copy() might decide to not migrate all pages in the
+range (for reasons under the callback control). For those, the callback just
+has to leave the corresponding dst entry empty.
 
-Finaly the migration of the struct page might fails (for file back page) for
+Finally, the migration of the struct page might fail (for file backed page) for
 various reasons (failure to freeze reference, or update page cache, ...). If
-that happens then the finalize_and_map() can catch any pages that was not
-migrated. Note those page were still copied to new page and thus we wasted
+that happens, then the finalize_and_map() can catch any pages that were not
+migrated. Note those pages were still copied to a new page and thus we wasted
 bandwidth but this is considered as a rare event and a price that we are
 willing to pay to keep all the code simpler.
 
@@ -358,27 +370,27 @@ willing to pay to keep all the code simpler.
 7) Memory cgroup (memcg) and rss accounting
 
 For now device memory is accounted as any regular page in rss counters (either
-anonymous if device page is use for anonymous, file if device page is use for
-file back page or shmem if device page is use for share memory). This is a
-deliberate choice to keep existing application that might start using device
-memory without knowing about it to keep runing unimpacted.
-
-Drawbacks is that OOM killer might kill an application using a lot of device
-memory and not a lot of regular system memory and thus not freeing much system
-memory. We want to gather more real world experience on how application and
-system react under memory pressure in the presence of device memory before
+anonymous if device page is used for anonymous, file if device page is used for
+file backed page or shmem if device page is used for shared memory). This is a
+deliberate choice to keep existing applications, that might start using device
+memory without knowing about it, running unimpacted.
+
+A drawback is that the OOM killer might kill an application using a lot of
+device memory and not a lot of regular system memory and thus not freeing much
+system memory. We want to gather more real world experience on how applications
+and system react under memory pressure in the presence of device memory before
 deciding to account device memory differently.
 
 
-Same decision was made for memory cgroup. Device memory page are accounted
+Same decision was made for memory cgroup. Device memory pages are accounted
 against same memory cgroup a regular page would be accounted to. This does
 simplify migration to and from device memory. This also means that migration
-back from device memory to regular memory can not fail because it would
+back from device memory to regular memory cannot fail because it would
 go above memory cgroup limit. We might revisit this choice latter on once we
-get more experience in how device memory is use and its impact on memory
+get more experience in how device memory is used and its impact on memory
 resource control.
 
 
-Note that device memory can never be pin nor by device driver nor through GUP
+Note that device memory can never be pinned by device driver nor through GUP
 and thus such memory is always free upon process exit. Or when last reference
-is drop in case of share memory or file back memory.
+is dropped in case of shared memory or file backed memory.
index 0478ae2ad44a388e006fdf7a5b8e75e4b669c09b..496868072e24ea538ccf290a36b2bb7e96608aa0 100644 (file)
@@ -90,7 +90,7 @@ Steps:
 
 1. Lock the page to be migrated
 
-2. Insure that writeback is complete.
+2. Ensure that writeback is complete.
 
 3. Lock the new page that we want to move to. It is locked so that accesses to
    this (not yet uptodate) page immediately lock while the move is in progress.
@@ -100,8 +100,8 @@ Steps:
    mapcount is not zero then we do not migrate the page. All user space
    processes that attempt to access the page will now wait on the page lock.
 
-5. The radix tree lock is taken. This will cause all processes trying
-   to access the page via the mapping to block on the radix tree spinlock.
+5. The i_pages lock is taken. This will cause all processes trying
+   to access the page via the mapping to block on the spinlock.
 
 6. The refcount of the page is examined and we back out if references remain
    otherwise we know that we are the only one referencing this page.
@@ -114,12 +114,12 @@ Steps:
 
 9. The radix tree is changed to point to the new page.
 
-10. The reference count of the old page is dropped because the radix tree
+10. The reference count of the old page is dropped because the address space
     reference is gone. A reference to the new page is established because
-    the new page is referenced to by the radix tree.
+    the new page is referenced by the address space.
 
-11. The radix tree lock is dropped. With that lookups in the mapping
-    become possible again. Processes will move from spinning on the tree_lock
+11. The i_pages lock is dropped. With that lookups in the mapping
+    become possible again. Processes will move from spinning on the lock
     to sleeping on the locked new page.
 
 12. The page contents are copied to the new page.
index 6d296bdce3280d29946c106df1941a50f809911b..b60179d948bbf79aac8a8ae15f7ff3f91b4f0cce 100644 (file)
@@ -1232,10 +1232,15 @@ F:      Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
 
 ARM/ASPEED MACHINE SUPPORT
 M:     Joel Stanley <joel@jms.id.au>
-S:     Maintained
+R:     Andrew Jeffery <andrew@aj.id.au>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:     linux-aspeed@lists.ozlabs.org (moderated for non-subscribers)
+Q:     https://patchwork.ozlabs.org/project/linux-aspeed/list/
+S:     Supported
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/joel/aspeed.git
 F:     arch/arm/mach-aspeed/
 F:     arch/arm/boot/dts/aspeed-*
-F:     drivers/*/*aspeed*
+N:     aspeed
 
 ARM/ATMEL AT91 Clock Support
 M:     Boris Brezillon <boris.brezillon@bootlin.com>
@@ -1743,7 +1748,7 @@ F:        arch/arm/mach-orion5x/ts78xx-*
 ARM/OXNAS platform support
 M:     Neil Armstrong <narmstrong@baylibre.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:     linux-oxnas@lists.tuxfamily.org (moderated for non-subscribers)
+L:     linux-oxnas@groups.io (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-oxnas/
 F:     arch/arm/boot/dts/ox8*.dts*
@@ -4392,7 +4397,7 @@ S:        Maintained
 F:     drivers/staging/fsl-dpaa2/ethsw
 
 DPT_I2O SCSI RAID DRIVER
-M:     Adaptec OEM Raid Solutions <aacraid@adaptec.com>
+M:     Adaptec OEM Raid Solutions <aacraid@microsemi.com>
 L:     linux-scsi@vger.kernel.org
 W:     http://www.adaptec.com/
 S:     Maintained
@@ -6410,6 +6415,7 @@ L:        linux-mm@kvack.org
 S:     Maintained
 F:     mm/hmm*
 F:     include/linux/hmm*
+F:     Documentation/vm/hmm.txt
 
 HOST AP DRIVER
 M:     Jouni Malinen <j@w1.fi>
@@ -7344,7 +7350,7 @@ F:        include/linux/ipmi*
 F:     include/uapi/linux/ipmi*
 
 IPS SCSI RAID DRIVER
-M:     Adaptec OEM Raid Solutions <aacraid@adaptec.com>
+M:     Adaptec OEM Raid Solutions <aacraid@microsemi.com>
 L:     linux-scsi@vger.kernel.org
 W:     http://www.adaptec.com/
 S:     Maintained
@@ -8048,6 +8054,14 @@ Q:       https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:     Supported
 F:     drivers/nvdimm/pmem*
 
+LIBNVDIMM: DEVICETREE BINDINGS
+M:     Oliver O'Halloran <oohall@gmail.com>
+L:     linux-nvdimm@lists.01.org
+Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
+S:     Supported
+F:     drivers/nvdimm/of_pmem.c
+F:     Documentation/devicetree/bindings/pmem/pmem-region.txt
+
 LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
 M:     Dan Williams <dan.j.williams@intel.com>
 L:     linux-nvdimm@lists.01.org
@@ -8851,6 +8865,15 @@ M:       Sean Wang <sean.wang@mediatek.com>
 S:     Maintained
 F:     drivers/media/rc/mtk-cir.c
 
+MEDIATEK DMA DRIVER
+M:     Sean Wang <sean.wang@mediatek.com>
+L:     dmaengine@vger.kernel.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:     linux-mediatek@lists.infradead.org (moderated for non-subscribers)
+S:     Maintained
+F:     Documentation/devicetree/bindings/dma/mtk-*
+F:     drivers/dma/mediatek/
+
 MEDIATEK PMIC LED DRIVER
 M:     Sean Wang <sean.wang@mediatek.com>
 S:     Maintained
@@ -9222,6 +9245,15 @@ S:       Maintained
 F:     drivers/usb/misc/usb251xb.c
 F:     Documentation/devicetree/bindings/usb/usb251xb.txt
 
+MICROSEMI MIPS SOCS
+M:     Alexandre Belloni <alexandre.belloni@bootlin.com>
+L:     linux-mips@linux-mips.org
+S:     Maintained
+F:     arch/mips/generic/board-ocelot.c
+F:     arch/mips/configs/generic/board-ocelot.config
+F:     arch/mips/boot/dts/mscc/
+F:     Documentation/devicetree/bindings/mips/mscc.txt
+
 MICROSEMI SMART ARRAY SMARTPQI DRIVER (smartpqi)
 M:     Don Brace <don.brace@microsemi.com>
 L:     esc.storagedev@microsemi.com
@@ -11736,7 +11768,7 @@ F:      drivers/char/random.c
 
 RAPIDIO SUBSYSTEM
 M:     Matt Porter <mporter@kernel.crashing.org>
-M:     Alexandre Bounine <alexandre.bounine@idt.com>
+M:     Alexandre Bounine <alex.bou9@gmail.com>
 S:     Maintained
 F:     drivers/rapidio/
 
@@ -11810,7 +11842,7 @@ X:      kernel/torture.c
 
 REAL TIME CLOCK (RTC) SUBSYSTEM
 M:     Alessandro Zummo <a.zummo@towertech.it>
-M:     Alexandre Belloni <alexandre.belloni@free-electrons.com>
+M:     Alexandre Belloni <alexandre.belloni@bootlin.com>
 L:     linux-rtc@vger.kernel.org
 Q:     http://patchwork.ozlabs.org/project/rtc-linux/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
@@ -13465,6 +13497,12 @@ S:     Maintained
 F:     drivers/gpio/gpio-dwapb.c
 F:     Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
 
+SYNOPSYS DESIGNWARE AXI DMAC DRIVER
+M:     Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+S:     Maintained
+F:     drivers/dma/dwi-axi-dmac/
+F:     Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
+
 SYNOPSYS DESIGNWARE DMAC DRIVER
 M:     Viresh Kumar <vireshk@kernel.org>
 R:     Andy Shevchenko <andriy.shevchenko@linux.intel.com>
index 2dbdf59258d9765c5ad5f336635c0fd05e1cb24b..f9d4e6b6d4bd1665d3fe491c98358312dc2c727f 100644 (file)
@@ -32,6 +32,7 @@
 #define MAP_NONBLOCK   0x40000         /* do not block on IO */
 #define MAP_STACK      0x80000         /* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB    0x100000        /* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE    0x200000/* MAP_FIXED which doesn't unmap underlying mapping */
 
 #define MS_ASYNC       1               /* sync memory asynchronously */
 #define MS_SYNC                2               /* synchronous memory sync */
index fbd2897566c389c9e923062afde12e19d141266b..c55d479971ccbc689b67d24edef3403995fca6d3 100644 (file)
                        device_type = "mdio";
                        #address-cells = <1>;
                        #size-cells = <0>;
-                       reg = <0x0 0x2d24000 0x0 0x4000>;
+                       reg = <0x0 0x2d24000 0x0 0x4000>,
+                             <0x0 0x2d10030 0x0 0x4>;
                };
 
                ptp_clock@2d10e00 {
index 74504b154256e36ff4897ed1c7df43eb4d91bdef..869080bedb89f031dd3295702cd1466c919a3113 100644 (file)
@@ -318,10 +318,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 extern void flush_kernel_dcache_page(struct page *);
 
-#define flush_dcache_mmap_lock(mapping) \
-       spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-       spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)                xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)      xa_unlock_irq(&mapping->i_pages)
 
 #define flush_icache_user_range(vma,page,addr,len) \
        flush_dcache_page(page)
index 4966677036937cf3417fc9eb6477f00710804b47..ed8fd0d19a3e10e7618b78a63169e9c41896760d 100644 (file)
 #include <mach/memory.h>
 #endif
 
-/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
 /* PAGE_OFFSET - the virtual address of the start of the kernel image */
 #define PAGE_OFFSET            UL(CONFIG_PAGE_OFFSET)
 
index 5f7cd88103ef0b5488026d02f85860c666ec1d65..c5f77d854c4faa338afc7693decea54c289dd4b6 100644 (file)
@@ -17,4 +17,6 @@ static const char *const npcm7xx_dt_match[] = {
 DT_MACHINE_START(NPCM7XX_DT, "NPCM7XX Chip family")
        .atag_offset    = 0x100,
        .dt_compat      = npcm7xx_dt_match,
+       .l2c_aux_val    = 0x0,
+       .l2c_aux_mask   = ~0x0,
 MACHINE_END
index ada8eb206a90b6824427d24c5019100895d225f2..8c398fedbbb6af30b461fb09cabd89b04cc09648 100644 (file)
@@ -466,6 +466,12 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
 void __init dma_contiguous_remap(void)
 {
        int i;
+
+       if (!dma_mmu_remap_num)
+               return;
+
+       /* call flush_cache_all() since CMA area would be large enough */
+       flush_cache_all();
        for (i = 0; i < dma_mmu_remap_num; i++) {
                phys_addr_t start = dma_mmu_remap[i].base;
                phys_addr_t end = start + dma_mmu_remap[i].size;
@@ -498,7 +504,15 @@ void __init dma_contiguous_remap(void)
                flush_tlb_kernel_range(__phys_to_virt(start),
                                       __phys_to_virt(end));
 
-               iotable_init(&map, 1);
+               /*
+                * All the memory in CMA region will be on ZONE_MOVABLE.
+                * If that zone is considered as highmem, the memory in CMA
+                * region is also considered as highmem even if it's
+                * physical address belong to lowmem. In this case,
+                * re-mapping isn't required.
+                */
+               if (!is_highmem_idx(ZONE_MOVABLE))
+                       iotable_init(&map, 1);
        }
 }
 
index eb1de66517d5ead818285aca5ec57fcff40bd704..f866870db749c4bf2b0e5ff03f687cda5569e651 100644 (file)
 #define MIN_GAP (128*1024*1024UL)
 #define MAX_GAP ((TASK_SIZE)/6*5)
 
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
 {
        if (current->personality & ADDR_COMPAT_LAYOUT)
                return 1;
 
-       if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+       if (rlim_stack->rlim_cur == RLIM_INFINITY)
                return 1;
 
        return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
-       unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long gap = rlim_stack->rlim_cur;
 
        if (gap < MIN_GAP)
                gap = MIN_GAP;
@@ -180,18 +180,18 @@ unsigned long arch_mmap_rnd(void)
        return rnd << PAGE_SHIFT;
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        unsigned long random_factor = 0UL;
 
        if (current->flags & PF_RANDOMIZE)
                random_factor = arch_mmap_rnd();
 
-       if (mmap_is_legacy()) {
+       if (mmap_is_legacy(rlim_stack)) {
                mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
                mm->get_unmapped_area = arch_get_unmapped_area;
        } else {
-               mm->mmap_base = mmap_base(random_factor);
+               mm->mmap_base = mmap_base(random_factor, rlim_stack);
                mm->get_unmapped_area = arch_get_unmapped_area_topdown;
        }
 }
index 7dfcec4700fef0355372cbf7d4c2f11176c914d6..0094c6653b06b44ac1172688fac240bae37fe24d 100644 (file)
@@ -140,10 +140,8 @@ static inline void __flush_icache_all(void)
        dsb(ish);
 }
 
-#define flush_dcache_mmap_lock(mapping) \
-       spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-       spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)                do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)      do { } while (0)
 
 /*
  * We don't appear to need to do anything here.  In fact, if we did, we'd
index 50fa96a497926178a010075701eea807a1b7e99d..49d99214f43c5011f2187fb5bc12c6d4537cd307 100644 (file)
 #include <asm/page-def.h>
 #include <asm/sizes.h>
 
-/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
 /*
  * Size of the PCI I/O space. This must remain a power of two so that
  * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses.
index decccffb03cac60abe6e5628e2d0f3a47cc7d099..842c8a5fcd53c0f5573bdf79c072c671c441ae54 100644 (file)
 #define MIN_GAP (SZ_128M)
 #define MAX_GAP        (STACK_TOP/6*5)
 
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
 {
        if (current->personality & ADDR_COMPAT_LAYOUT)
                return 1;
 
-       if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+       if (rlim_stack->rlim_cur == RLIM_INFINITY)
                return 1;
 
        return sysctl_legacy_va_layout;
@@ -62,9 +62,9 @@ unsigned long arch_mmap_rnd(void)
        return rnd << PAGE_SHIFT;
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
-       unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long gap = rlim_stack->rlim_cur;
        unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
 
        /* Values close to RLIM_INFINITY can overflow. */
@@ -83,7 +83,7 @@ static unsigned long mmap_base(unsigned long rnd)
  * This function, called very early during the creation of a new process VM
  * image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        unsigned long random_factor = 0UL;
 
@@ -94,11 +94,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
         * Fall back to the standard layout if the personality bit is set, or
         * if the expected stack growth is unlimited:
         */
-       if (mmap_is_legacy()) {
+       if (mmap_is_legacy(rlim_stack)) {
                mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
                mm->get_unmapped_area = arch_get_unmapped_area;
        } else {
-               mm->mmap_base = mmap_base(random_factor);
+               mm->mmap_base = mmap_base(random_factor, rlim_stack);
                mm->get_unmapped_area = arch_get_unmapped_area_topdown;
        }
 }
index 6f6096ff05a462182797cc75587f70c7d6286a53..6ab942e6c534a01be558b1a1e85511b4f6dacbe9 100644 (file)
@@ -25,6 +25,7 @@ KBUILD_AFLAGS   += -mbig-endian
 LINKFLAGS       += -mbig-endian
 KBUILD_LDFLAGS  += -mbig-endian
 LDFLAGS += -EB
+CHECKFLAGS     += -D_BIG_ENDIAN
 endif
 
 head-y          := arch/c6x/kernel/head.o
index cff57764fcad1fbbfc3bdbbf024045a4ee87398b..0f8fde494875edd371e36ad0315c6435ab3b3915 100644 (file)
@@ -107,7 +107,6 @@ void foo(void)
        /* These would be unneccessary if we ran asm files
         * through the preprocessor.
         */
-       DEFINE(KTHREAD_SIZE, THREAD_SIZE);
        DEFINE(KTHREAD_SHIFT, THREAD_SHIFT);
        DEFINE(KTHREAD_START_SP, THREAD_START_SP);
        DEFINE(ENOSYS_, ENOSYS);
index e8b6cc6a7b5ac4e97f20877e05817e209fff7e91..1ef04b5ab93fa731a6cff93ada07452fbcd9fbce 100644 (file)
@@ -19,6 +19,7 @@
 
 #include <asm/clock.h>
 #include <asm/setup.h>
+#include <asm/special_insns.h>
 #include <asm/irq.h>
 
 /*
index 114b93488193283e4f1801b59d3a4fdf50704ab9..5de871eb4a59176773d1c9fac3b53433a258396a 100644 (file)
@@ -47,9 +47,10 @@ extern int pci_proc_domain(struct pci_bus *bus);
 
 struct vm_area_struct;
 
-/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
-#define HAVE_PCI_MMAP          1
-#define arch_can_pci_mmap_io() 1
+/* Tell PCI code what kind of PCI resource mappings we support */
+#define HAVE_PCI_MMAP                  1
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
+#define arch_can_pci_mmap_io()         1
 
 extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
                           size_t count);
index e53b8532353c5b8267604005b89327114b8b7d3c..db8b1fa83452c1fec163e068acb723d18b6166d8 100644 (file)
@@ -33,6 +33,8 @@ extern int mem_init_done;
 #define PAGE_KERNEL            __pgprot(0) /* these mean nothing to non MMU */
 
 #define pgprot_noncached(x)    (x)
+#define pgprot_writecombine    pgprot_noncached
+#define pgprot_device          pgprot_noncached
 
 #define __swp_type(x)          (0)
 #define __swp_offset(x)                (0)
index ae79e8638d50ba18410bfeb0cdcc227b5b481dc4..161f9758c631b3ca89da36d0691b951493a98078 100644 (file)
@@ -151,72 +151,22 @@ void pcibios_set_master(struct pci_dev *dev)
 }
 
 /*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
- *  -- paulus.
+ * Platform support for /proc/bus/pci/X/Y mmap()s.
  */
 
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap.  They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
-                                              resource_size_t *offset,
-                                              enum pci_mmap_state mmap_state)
+int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
 {
-       struct pci_controller *hose = pci_bus_to_host(dev->bus);
-       unsigned long io_offset = 0;
-       int i, res_bit;
+       struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+       resource_size_t ioaddr = pci_resource_start(pdev, bar);
 
        if (!hose)
-               return NULL;            /* should never happen */
-
-       /* If memory, add on the PCI bridge address offset */
-       if (mmap_state == pci_mmap_mem) {
-#if 0 /* See comment in pci_resource_to_user() for why this is disabled */
-               *offset += hose->pci_mem_offset;
-#endif
-               res_bit = IORESOURCE_MEM;
-       } else {
-               io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-               *offset += io_offset;
-               res_bit = IORESOURCE_IO;
-       }
-
-       /*
-        * Check that the offset requested corresponds to one of the
-        * resources of the device.
-        */
-       for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-               struct resource *rp = &dev->resource[i];
-               int flags = rp->flags;
+               return -EINVAL;         /* should never happen */
 
-               /* treat ROM as memory (should be already) */
-               if (i == PCI_ROM_RESOURCE)
-                       flags |= IORESOURCE_MEM;
-
-               /* Active and same type? */
-               if ((flags & res_bit) == 0)
-                       continue;
-
-               /* In the range of this resource? */
-               if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
-                       continue;
-
-               /* found it! construct the final physical address */
-               if (mmap_state == pci_mmap_io)
-                       *offset += hose->io_base_phys - io_offset;
-               return rp;
-       }
+       /* Convert to an offset within this PCI controller */
+       ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE;
 
-       return NULL;
+       vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT;
+       return 0;
 }
 
 /*
@@ -268,37 +218,6 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
        return prot;
 }
 
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture.  The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, int bar, struct vm_area_struct *vma,
-                       enum pci_mmap_state mmap_state, int write_combine)
-{
-       resource_size_t offset =
-               ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
-       struct resource *rp;
-       int ret;
-
-       rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
-       if (rp == NULL)
-               return -EINVAL;
-
-       vma->vm_pgoff = offset >> PAGE_SHIFT;
-       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-       ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-                              vma->vm_end - vma->vm_start, vma->vm_page_prot);
-
-       return ret;
-}
-
 /* This provides legacy IO read access on a bus */
 int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
 {
index 61e9a24297b7d6f3ca06104c56d2f261b3e97e9e..225c95da23ce62e8844ff663c30d56f39cd44d15 100644 (file)
@@ -2029,6 +2029,7 @@ config CPU_MIPSR6
        select CPU_HAS_RIXI
        select HAVE_ARCH_BITREVERSE
        select MIPS_ASID_BITS_VARIABLE
+       select MIPS_CRC_SUPPORT
        select MIPS_SPRAM
 
 config EVA
@@ -2502,6 +2503,9 @@ config MIPS_ASID_BITS
 config MIPS_ASID_BITS_VARIABLE
        bool
 
+config MIPS_CRC_SUPPORT
+       bool
+
 #
 # - Highmem only makes sense for the 32-bit kernel.
 # - The current highmem code will only work properly on physically indexed
@@ -2850,8 +2854,7 @@ config CRASH_DUMP
 
 config PHYSICAL_START
        hex "Physical address where the kernel is loaded"
-       default "0xffffffff84000000" if 64BIT
-       default "0x84000000" if 32BIT
+       default "0xffffffff84000000"
        depends on CRASH_DUMP
        help
          This gives the CKSEG0 or KSEG0 address where the kernel is loaded.
index d1ca839c3981e5c7db6d42affe939f2fc0d8dbb0..5e9fce076ab6896bbcd7faf7c4e30611b6e65f5e 100644 (file)
@@ -222,6 +222,8 @@ xpa-cflags-y                                := $(mips-cflags)
 xpa-cflags-$(micromips-ase)            += -mmicromips -Wa$(comma)-fatal-warnings
 toolchain-xpa                          := $(call cc-option-yn,$(xpa-cflags-y) -mxpa)
 cflags-$(toolchain-xpa)                        += -DTOOLCHAIN_SUPPORTS_XPA
+toolchain-crc                          := $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc)
+cflags-$(toolchain-crc)                        += -DTOOLCHAIN_SUPPORTS_CRC
 
 #
 # Firmware support
@@ -249,20 +251,12 @@ ifdef CONFIG_PHYSICAL_START
 load-y                                 = $(CONFIG_PHYSICAL_START)
 endif
 
-entry-noisa-y                          = 0x$(shell $(NM) vmlinux 2>/dev/null \
-                                       | grep "\bkernel_entry\b" | cut -f1 -d \ )
-ifdef CONFIG_CPU_MICROMIPS
-  #
-  # Set the ISA bit, since the kernel_entry symbol in the ELF will have it
-  # clear which would lead to images containing addresses which bootloaders may
-  # jump to as MIPS32 code.
-  #
-  entry-y = $(patsubst %0,%1,$(patsubst %2,%3,$(patsubst %4,%5, \
-              $(patsubst %6,%7,$(patsubst %8,%9,$(patsubst %a,%b, \
-              $(patsubst %c,%d,$(patsubst %e,%f,$(entry-noisa-y)))))))))
-else
-  entry-y = $(entry-noisa-y)
-endif
+# Sign-extend the entry point to 64 bits if retrieved as a 32-bit number.
+entry-y                = $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \
+                       | sed -n '/^start address / { \
+                               s/^.* //; \
+                               s/0x\([0-7].......\)$$/0x00000000\1/; \
+                               s/0x\(........\)$$/0xffffffff\1/; p }')
 
 cflags-y                       += -I$(srctree)/arch/mips/include/asm/mach-generic
 drivers-$(CONFIG_PCI)          += arch/mips/pci/
@@ -330,6 +324,7 @@ libs-y                      += arch/mips/math-emu/
 # See arch/mips/Kbuild for content of core part of the kernel
 core-y += arch/mips/
 
+drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
 drivers-$(CONFIG_OPROFILE)     += arch/mips/oprofile/
 
 # suspend and hibernation support
@@ -473,6 +468,21 @@ define archhelp
        echo
        echo '  {micro32,32,64}{r1,r2,r6}{el,}_defconfig <BOARDS="list of boards">'
        echo
+       echo '  Where BOARDS is some subset of the following:'
+       for board in $(sort $(BOARDS)); do echo "    $${board}"; done
+       echo
+       echo '  Specifically the following generic default configurations are'
+       echo '  supported:'
+       echo
+       $(foreach cfg,$(generic_defconfigs),
+         printf "  %-24s - Build generic kernel for $(call describe_generic_defconfig,$(cfg))\n" $(cfg);)
+       echo
+       echo '  The following legacy default configurations have been converted to'
+       echo '  generic and can still be used:'
+       echo
+       $(foreach cfg,$(sort $(legacy_defconfigs)),
+         printf "  %-24s - Build $($(cfg)-y)\n" $(cfg);)
+       echo
        echo '  Otherwise, the following default configurations are available:'
 endef
 
@@ -507,6 +517,10 @@ endef
 $(eval $(call gen_generic_defconfigs,32 64,r1 r2 r6,eb el))
 $(eval $(call gen_generic_defconfigs,micro32,r2,eb el))
 
+define describe_generic_defconfig
+$(subst 32r,MIPS32 r,$(subst 64r,MIPS64 r,$(subst el, little endian,$(patsubst %_defconfig,%,$(1)))))
+endef
+
 .PHONY: $(generic_defconfigs)
 $(generic_defconfigs):
        $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
@@ -543,14 +557,18 @@ generic_defconfig:
 # now that the boards have been converted to use the generic kernel they are
 # wrappers around the generic rules above.
 #
-.PHONY: sead3_defconfig
-sead3_defconfig:
-       $(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=sead-3
+legacy_defconfigs              += ocelot_defconfig
+ocelot_defconfig-y             := 32r2el_defconfig BOARDS=ocelot
+
+legacy_defconfigs              += sead3_defconfig
+sead3_defconfig-y              := 32r2el_defconfig BOARDS=sead-3
+
+legacy_defconfigs              += sead3micro_defconfig
+sead3micro_defconfig-y         := micro32r2el_defconfig BOARDS=sead-3
 
-.PHONY: sead3micro_defconfig
-sead3micro_defconfig:
-       $(Q)$(MAKE) -f $(srctree)/Makefile micro32r2el_defconfig BOARDS=sead-3
+legacy_defconfigs              += xilfpga_defconfig
+xilfpga_defconfig-y            := 32r2el_defconfig BOARDS=xilfpga
 
-.PHONY: xilfpga_defconfig
-xilfpga_defconfig:
-       $(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=xilfpga
+.PHONY: $(legacy_defconfigs)
+$(legacy_defconfigs):
+       $(Q)$(MAKE) -f $(srctree)/Makefile $($@-y)
index 328d697e72b49b505c08d5d4205687632f4ef0b1..4e79dbd54a339143e3a3810ec45b0ee5fb734f2d 100644 (file)
@@ -190,7 +190,7 @@ static struct platform_device gpr_mtd_device = {
 /*
  * LEDs
  */
-static struct gpio_led gpr_gpio_leds[] = {
+static const struct gpio_led gpr_gpio_leds[] = {
        {       /* green */
                .name                   = "gpr:green",
                .gpio                   = 4,
index 85bb75669b0d59797a626f40610292f2f60e0bb2..aab55aaf3d62a10c5e9882f4fe56f6a32c9d2a39 100644 (file)
@@ -145,7 +145,7 @@ static struct platform_device mtx1_wdt = {
        .resource = mtx1_wdt_res,
 };
 
-static struct gpio_led default_leds[] = {
+static const struct gpio_led default_leds[] = {
        {
                .name   = "mtx1:green",
                .gpio = 211,
index e1675c25d5d48cb7c9da6f30983383d54a64251f..f09262e0a72f386c91aba072589c9f03a56ae40e 100644 (file)
@@ -346,7 +346,7 @@ static struct platform_device ar7_udc = {
 /*****************************************************************************
  * LEDs
  ****************************************************************************/
-static struct gpio_led default_leds[] = {
+static const struct gpio_led default_leds[] = {
        {
                .name                   = "status",
                .gpio                   = 8,
@@ -354,12 +354,12 @@ static struct gpio_led default_leds[] = {
        },
 };
 
-static struct gpio_led titan_leds[] = {
+static const struct gpio_led titan_leds[] = {
        { .name = "status", .gpio = 8, .active_low = 1, },
        { .name = "wifi", .gpio = 13, .active_low = 1, },
 };
 
-static struct gpio_led dsl502t_leds[] = {
+static const struct gpio_led dsl502t_leds[] = {
        {
                .name                   = "status",
                .gpio                   = 9,
@@ -377,7 +377,7 @@ static struct gpio_led dsl502t_leds[] = {
        },
 };
 
-static struct gpio_led dg834g_leds[] = {
+static const struct gpio_led dg834g_leds[] = {
        {
                .name                   = "ppp",
                .gpio                   = 6,
@@ -406,7 +406,7 @@ static struct gpio_led dg834g_leds[] = {
        },
 };
 
-static struct gpio_led fb_sl_leds[] = {
+static const struct gpio_led fb_sl_leds[] = {
        {
                .name                   = "1",
                .gpio                   = 7,
@@ -433,7 +433,7 @@ static struct gpio_led fb_sl_leds[] = {
        },
 };
 
-static struct gpio_led fb_fon_leds[] = {
+static const struct gpio_led fb_fon_leds[] = {
        {
                .name                   = "1",
                .gpio                   = 8,
@@ -459,7 +459,7 @@ static struct gpio_led fb_fon_leds[] = {
        },
 };
 
-static struct gpio_led gt701_leds[] = {
+static const struct gpio_led gt701_leds[] = {
        {
                .name                   = "inet:green",
                .gpio                   = 13,
index 88a8fb2bbc71a9e9ae1949319f173e80ad61cdfd..88d400d256c416173a41883d2a5c05a809809d17 100644 (file)
@@ -355,7 +355,7 @@ bcm47xx_buttons_luxul_xwr_600_v1[] = {
 
 static const struct gpio_keys_button
 bcm47xx_buttons_luxul_xwr_1750_v1[] = {
-       BCM47XX_GPIO_KEY(14, BTN_TASK),
+       BCM47XX_GPIO_KEY(14, KEY_RESTART),
 };
 
 /* Microsoft */
index 8307a8a0266724f3865783dc10983489f5664749..34a7b3fbdfd9046a1e6f29d5076eecd60d672ef8 100644 (file)
@@ -408,6 +408,12 @@ bcm47xx_leds_luxul_xap_1500_v1[] __initconst = {
        BCM47XX_GPIO_LED_TRIGGER(13, "green", "status", 1, "timer"),
 };
 
+static const struct gpio_led
+bcm47xx_leds_luxul_xap1500_v1_extra[] __initconst = {
+       BCM47XX_GPIO_LED(44, "green", "5ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+       BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
 static const struct gpio_led
 bcm47xx_leds_luxul_xbr_4400_v1[] __initconst = {
        BCM47XX_GPIO_LED(12, "green", "usb", 0, LEDS_GPIO_DEFSTATE_OFF),
@@ -435,6 +441,11 @@ bcm47xx_leds_luxul_xwr_1750_v1[] __initconst = {
        BCM47XX_GPIO_LED(15, "green", "wps", 0, LEDS_GPIO_DEFSTATE_OFF),
 };
 
+static const struct gpio_led
+bcm47xx_leds_luxul_xwr1750_v1_extra[] __initconst = {
+       BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
 /* Microsoft */
 
 static const struct gpio_led
@@ -528,6 +539,12 @@ static struct gpio_led_platform_data bcm47xx_leds_pdata;
        bcm47xx_leds_pdata.num_leds = ARRAY_SIZE(dev_leds);             \
 } while (0)
 
+static struct gpio_led_platform_data bcm47xx_leds_pdata_extra __initdata = {};
+#define bcm47xx_set_pdata_extra(dev_leds) do {                         \
+       bcm47xx_leds_pdata_extra.leds = dev_leds;                       \
+       bcm47xx_leds_pdata_extra.num_leds = ARRAY_SIZE(dev_leds);       \
+} while (0)
+
 void __init bcm47xx_leds_register(void)
 {
        enum bcm47xx_board board = bcm47xx_board_get();
@@ -705,6 +722,7 @@ void __init bcm47xx_leds_register(void)
                break;
        case BCM47XX_BOARD_LUXUL_XAP_1500_V1:
                bcm47xx_set_pdata(bcm47xx_leds_luxul_xap_1500_v1);
+               bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xap1500_v1_extra);
                break;
        case BCM47XX_BOARD_LUXUL_XBR_4400_V1:
                bcm47xx_set_pdata(bcm47xx_leds_luxul_xbr_4400_v1);
@@ -717,6 +735,7 @@ void __init bcm47xx_leds_register(void)
                break;
        case BCM47XX_BOARD_LUXUL_XWR_1750_V1:
                bcm47xx_set_pdata(bcm47xx_leds_luxul_xwr_1750_v1);
+               bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xwr1750_v1_extra);
                break;
 
        case BCM47XX_BOARD_MICROSOFT_MN700:
@@ -760,4 +779,6 @@ void __init bcm47xx_leds_register(void)
        }
 
        gpio_led_register_device(-1, &bcm47xx_leds_pdata);
+       if (bcm47xx_leds_pdata_extra.num_leds)
+               gpio_led_register_device(0, &bcm47xx_leds_pdata_extra);
 }
index e2c6f131c8ebbd5bdd3db36b89ddb44ca1aa4423..1e79cab8e2690c6e912579e4dac579a0ff550944 100644 (file)
@@ -4,6 +4,7 @@ subdir-y        += cavium-octeon
 subdir-y       += img
 subdir-y       += ingenic
 subdir-y       += lantiq
+subdir-y       += mscc
 subdir-y       += mti
 subdir-y       += netlogic
 subdir-y       += ni
index 2f9ef565e5d08976ffd14a13a9b51e5f2de82e01..5bf77b6fcceb949e0ce59df0e0e376aea0aad6b5 100644 (file)
                        status = "disabled";
                };
 
+               watchdog: watchdog@4067e8 {
+                       clocks = <&upg_clk>;
+                       compatible = "brcm,bcm7038-wdt";
+                       reg = <0x4067e8 0x14>;
+                       status = "disabled";
+               };
+
                upg_gio: gpio@406700 {
                        compatible = "brcm,brcmstb-gpio";
                        reg = <0x406700 0x80>;
index 02e426fe6013227c01a1319e2c2f4bb96dc7046e..2afa0dada575becb0e185f6d7295da6b15156838 100644 (file)
                        status = "disabled";
                };
 
+               watchdog: watchdog@4067e8 {
+                       clocks = <&upg_clk>;
+                       compatible = "brcm,bcm7038-wdt";
+                       reg = <0x4067e8 0x14>;
+                       status = "disabled";
+               };
+
                aon_pm_l2_intc: interrupt-controller@408440 {
                        compatible = "brcm,l2-intc";
                        reg = <0x408440 0x30>;
                        brcm,irq-can-wake;
                };
 
+               aon_ctrl: syscon@408000 {
+                       compatible = "brcm,brcmstb-aon-ctrl";
+                       reg = <0x408000 0x100>, <0x408200 0x200>;
+                       reg-names = "aon-ctrl", "aon-sram";
+               };
+
+               timers: timer@4067c0 {
+                       compatible = "brcm,brcmstb-timers";
+                       reg = <0x4067c0 0x40>;
+               };
+
                upg_gio: gpio@406700 {
                        compatible = "brcm,brcmstb-gpio";
                        reg = <0x406700 0x60>;
                        interrupt-names = "mspi_done";
                        status = "disabled";
                };
+
+               waketimer: waketimer@408e80 {
+                       compatible = "brcm,brcmstb-waketimer";
+                       reg = <0x408e80 0x14>;
+                       interrupts = <0x3>;
+                       interrupt-parent = <&aon_pm_l2_intc>;
+                       interrupt-names = "timer";
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+       };
+
+       memory_controllers {
+               compatible = "simple-bus";
+               ranges = <0x0 0x103b0000 0xa000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               memory-controller@0 {
+                       compatible = "brcm,brcmstb-memc", "simple-bus";
+                       ranges = <0x0 0x0 0xa000>;
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       memc-arb@1000 {
+                               compatible = "brcm,brcmstb-memc-arb";
+                               reg = <0x1000 0x248>;
+                       };
+
+                       memc-ddr@2000 {
+                               compatible = "brcm,brcmstb-memc-ddr";
+                               reg = <0x2000 0x300>;
+                       };
+
+                       ddr-phy@6000 {
+                               compatible = "brcm,brcmstb-ddr-phy";
+                               reg = <0x6000 0xc8>;
+                       };
+
+                       shimphy@8000 {
+                               compatible = "brcm,brcmstb-ddr-shimphy";
+                               reg = <0x8000 0x13c>;
+                       };
+               };
        };
 };
index 1089d6ebc84161c8f78ad55f30bbd26f0e8600a5..6375fc77f389b7a5cfcbd0413201a067328aba12 100644 (file)
                        status = "disabled";
                };
 
+               watchdog: watchdog@4066a8 {
+                       clocks = <&upg_clk>;
+                       compatible = "brcm,bcm7038-wdt";
+                       reg = <0x4066a8 0x14>;
+                       status = "disabled";
+               };
+
                aon_pm_l2_intc: interrupt-controller@408240 {
                        compatible = "brcm,l2-intc";
                        reg = <0x408240 0x30>;
                        interrupt-names = "mspi_done";
                        status = "disabled";
                };
+
+               waketimer: waketimer@408e80 {
+                       compatible = "brcm,brcmstb-waketimer";
+                       reg = <0x408e80 0x14>;
+                       interrupts = <0x3>;
+                       interrupt-parent = <&aon_pm_l2_intc>;
+                       interrupt-names = "timer";
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
        };
 };
index 4b87ebec407af0e9e1e567d72e22b26db7d5e6db..a57cacea91cf45ee790121874a4c9d022be7ba46 100644 (file)
                        status = "disabled";
                };
 
+               watchdog: watchdog@4066a8 {
+                       clocks = <&upg_clk>;
+                       compatible = "brcm,bcm7038-wdt";
+                       reg = <0x4066a8 0x14>;
+                       status = "disabled";
+               };
+
                aon_pm_l2_intc: interrupt-controller@408440 {
                        compatible = "brcm,l2-intc";
                        reg = <0x408440 0x30>;
                        brcm,irq-can-wake;
                };
 
+               aon_ctrl: syscon@408000 {
+                       compatible = "brcm,brcmstb-aon-ctrl";
+                       reg = <0x408000 0x100>, <0x408200 0x200>;
+                       reg-names = "aon-ctrl", "aon-sram";
+               };
+
+               timers: timer@406680 {
+                       compatible = "brcm,brcmstb-timers";
+                       reg = <0x406680 0x40>;
+               };
+
                upg_gio: gpio@406500 {
                        compatible = "brcm,brcmstb-gpio";
                        reg = <0x406500 0xa0>;
                        interrupt-names = "mspi_done";
                        status = "disabled";
                };
+
+               waketimer: waketimer@408e80 {
+                       compatible = "brcm,brcmstb-waketimer";
+                       reg = <0x408e80 0x14>;
+                       interrupts = <0x3>;
+                       interrupt-parent = <&aon_pm_l2_intc>;
+                       interrupt-names = "timer";
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+       };
+
+       memory_controllers {
+               compatible = "simple-bus";
+               ranges = <0x0 0x103b0000 0xa000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               memory-controller@0 {
+                       compatible = "brcm,brcmstb-memc", "simple-bus";
+                       ranges = <0x0 0x0 0xa000>;
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       memc-arb@1000 {
+                               compatible = "brcm,brcmstb-memc-arb";
+                               reg = <0x1000 0x248>;
+                       };
+
+                       memc-ddr@2000 {
+                               compatible = "brcm,brcmstb-memc-ddr";
+                               reg = <0x2000 0x300>;
+                       };
+
+                       ddr-phy@6000 {
+                               compatible = "brcm,brcmstb-ddr-phy";
+                               reg = <0x6000 0xc8>;
+                       };
+
+                       shimphy@8000 {
+                               compatible = "brcm,brcmstb-ddr-shimphy";
+                               reg = <0x8000 0x13c>;
+                       };
+               };
        };
 };
index ca657df34b6d9e4a0c8fbaee0238de97c907d0c6..728b9e9f84b814b83bfa3553dac4c24e208affd6 100644 (file)
                        status = "disabled";
                };
 
+               watchdog: watchdog@4066a8 {
+                       clocks = <&upg_clk>;
+                       compatible = "brcm,bcm7038-wdt";
+                       reg = <0x4066a8 0x14>;
+                       status = "disabled";
+               };
+
                aon_pm_l2_intc: interrupt-controller@408440 {
                        compatible = "brcm,l2-intc";
                        reg = <0x408440 0x30>;
                        brcm,irq-can-wake;
                };
 
+               aon_ctrl: syscon@408000 {
+                       compatible = "brcm,brcmstb-aon-ctrl";
+                       reg = <0x408000 0x100>, <0x408200 0x200>;
+                       reg-names = "aon-ctrl", "aon-sram";
+               };
+
+               timers: timer@406680 {
+                       compatible = "brcm,brcmstb-timers";
+                       reg = <0x406680 0x40>;
+               };
+
                upg_gio: gpio@406500 {
                        compatible = "brcm,brcmstb-gpio";
                        reg = <0x406500 0xa0>;
                        interrupt-names = "mspi_done";
                        status = "disabled";
                };
+
+               waketimer: waketimer@408e80 {
+                       compatible = "brcm,brcmstb-waketimer";
+                       reg = <0x408e80 0x14>;
+                       interrupts = <0x3>;
+                       interrupt-parent = <&aon_pm_l2_intc>;
+                       interrupt-names = "timer";
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+       };
+
+       memory_controllers {
+               compatible = "simple-bus";
+               ranges = <0x0 0x103b0000 0xa000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               memory-controller@0 {
+                       compatible = "brcm,brcmstb-memc", "simple-bus";
+                       ranges = <0x0 0x0 0xa000>;
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       memc-arb@1000 {
+                               compatible = "brcm,brcmstb-memc-arb";
+                               reg = <0x1000 0x248>;
+                       };
+
+                       memc-ddr@2000 {
+                               compatible = "brcm,brcmstb-memc-ddr";
+                               reg = <0x2000 0x300>;
+                       };
+
+                       ddr-phy@6000 {
+                               compatible = "brcm,brcmstb-ddr-phy";
+                               reg = <0x6000 0xc8>;
+                       };
+
+                       shimphy@8000 {
+                               compatible = "brcm,brcmstb-ddr-shimphy";
+                               reg = <0x8000 0x13c>;
+                       };
+               };
        };
 };
index d262e11bc3f905945bbc94f43a313b5f347de7cf..9540c27f12e72664780088b92a857dff5bf324ed 100644 (file)
                        status = "disabled";
                };
 
+               watchdog: watchdog@4067e8 {
+                       clocks = <&upg_clk>;
+                       compatible = "brcm,bcm7038-wdt";
+                       reg = <0x4067e8 0x14>;
+                       status = "disabled";
+               };
+
                upg_gio: gpio@406700 {
                        compatible = "brcm,brcmstb-gpio";
                        reg = <0x406700 0x80>;
index e4fb9b6e6dce920b283b7418eae5d9cbb067ef87..410e61ebaf9e5b070f540446ec2e585323142c90 100644 (file)
                        status = "disabled";
                };
 
+               watchdog: watchdog@4067e8 {
+                       clocks = <&upg_clk>;
+                       compatible = "brcm,bcm7038-wdt";
+                       reg = <0x4067e8 0x14>;
+                       status = "disabled";
+               };
+
                aon_pm_l2_intc: interrupt-controller@408440 {
                        compatible = "brcm,l2-intc";
                        reg = <0x408440 0x30>;
                        brcm,irq-can-wake;
                };
 
+               aon_ctrl: syscon@408000 {
+                       compatible = "brcm,brcmstb-aon-ctrl";
+                       reg = <0x408000 0x100>, <0x408200 0x200>;
+                       reg-names = "aon-ctrl", "aon-sram";
+               };
+
+               timers: timer@4067c0 {
+                       compatible = "brcm,brcmstb-timers";
+                       reg = <0x4067c0 0x40>;
+               };
+
                upg_gio: gpio@406700 {
                        compatible = "brcm,brcmstb-gpio";
                        reg = <0x406700 0x80>;
                        interrupt-names = "mspi_done";
                        status = "disabled";
                };
+
+               waketimer: waketimer@409580 {
+                       compatible = "brcm,brcmstb-waketimer";
+                       reg = <0x409580 0x14>;
+                       interrupts = <0x3>;
+                       interrupt-parent = <&aon_pm_l2_intc>;
+                       interrupt-names = "timer";
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+       };
+
+       memory_controllers {
+               compatible = "simple-bus";
+               ranges = <0x0 0x103b0000 0x1a000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               memory-controller@0 {
+                       compatible = "brcm,brcmstb-memc", "simple-bus";
+                       ranges = <0x0 0x0 0xa000>;
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       memc-arb@1000 {
+                               compatible = "brcm,brcmstb-memc-arb";
+                               reg = <0x1000 0x248>;
+                       };
+
+                       memc-ddr@2000 {
+                               compatible = "brcm,brcmstb-memc-ddr";
+                               reg = <0x2000 0x300>;
+                       };
+
+                       ddr-phy@6000 {
+                               compatible = "brcm,brcmstb-ddr-phy";
+                               reg = <0x6000 0xc8>;
+                       };
+
+                       shimphy@8000 {
+                               compatible = "brcm,brcmstb-ddr-shimphy";
+                               reg = <0x8000 0x13c>;
+                       };
+               };
+
+               memory-controller@1 {
+                       compatible = "brcm,brcmstb-memc", "simple-bus";
+                       ranges = <0x0 0x10000 0xa000>;
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       memc-arb@1000 {
+                               compatible = "brcm,brcmstb-memc-arb";
+                               reg = <0x1000 0x248>;
+                       };
+
+                       memc-ddr@2000 {
+                               compatible = "brcm,brcmstb-memc-ddr";
+                               reg = <0x2000 0x300>;
+                       };
+
+                       ddr-phy@6000 {
+                               compatible = "brcm,brcmstb-ddr-phy";
+                               reg = <0x6000 0xc8>;
+                       };
+
+                       shimphy@8000 {
+                               compatible = "brcm,brcmstb-ddr-shimphy";
+                               reg = <0x8000 0x13c>;
+                       };
+               };
        };
 };
index 1484e8990e52a11364e9622cff7664a164b31582..8398b7f68bf49fcd54deed8425ab4776f0815b72 100644 (file)
                        status = "disabled";
                };
 
+               watchdog: watchdog@4067e8 {
+                       clocks = <&upg_clk>;
+                       compatible = "brcm,bcm7038-wdt";
+                       reg = <0x4067e8 0x14>;
+                       status = "disabled";
+               };
+
                aon_pm_l2_intc: interrupt-controller@408440 {
                        compatible = "brcm,l2-intc";
                        reg = <0x408440 0x30>;
                        brcm,irq-can-wake;
                };
 
+               aon_ctrl: syscon@408000 {
+                       compatible = "brcm,brcmstb-aon-ctrl";
+                       reg = <0x408000 0x100>, <0x408200 0x200>;
+                       reg-names = "aon-ctrl", "aon-sram";
+               };
+
+               timers: timer@4067c0 {
+                       compatible = "brcm,brcmstb-timers";
+                       reg = <0x4067c0 0x40>;
+               };
+
                upg_gio: gpio@406700 {
                        compatible = "brcm,brcmstb-gpio";
                        reg = <0x406700 0x80>;
                        interrupt-names = "mspi_done";
                        status = "disabled";
                };
+
+               waketimer: waketimer@409580 {
+                       compatible = "brcm,brcmstb-waketimer";
+                       reg = <0x409580 0x14>;
+                       interrupts = <0x3>;
+                       interrupt-parent = <&aon_pm_l2_intc>;
+                       interrupt-names = "timer";
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+       };
+
+       memory_controllers {
+               compatible = "simple-bus";
+               ranges = <0x0 0x103b0000 0x1a000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               memory-controller@0 {
+                       compatible = "brcm,brcmstb-memc", "simple-bus";
+                       ranges = <0x0 0x0 0xa000>;
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       memc-arb@1000 {
+                               compatible = "brcm,brcmstb-memc-arb";
+                               reg = <0x1000 0x248>;
+                       };
+
+                       memc-ddr@2000 {
+                               compatible = "brcm,brcmstb-memc-ddr";
+                               reg = <0x2000 0x300>;
+                       };
+
+                       ddr-phy@6000 {
+                               compatible = "brcm,brcmstb-ddr-phy";
+                               reg = <0x6000 0xc8>;
+                       };
+
+                       shimphy@8000 {
+                               compatible = "brcm,brcmstb-ddr-shimphy";
+                               reg = <0x8000 0x13c>;
+                       };
+               };
+
+               memory-controller@1 {
+                       compatible = "brcm,brcmstb-memc", "simple-bus";
+                       ranges = <0x0 0x10000 0xa000>;
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       memc-arb@1000 {
+                               compatible = "brcm,brcmstb-memc-arb";
+                               reg = <0x1000 0x248>;
+                       };
+
+                       memc-ddr@2000 {
+                               compatible = "brcm,brcmstb-memc-ddr";
+                               reg = <0x2000 0x300>;
+                       };
+
+                       ddr-phy@6000 {
+                               compatible = "brcm,brcmstb-ddr-phy";
+                               reg = <0x6000 0xc8>;
+                       };
+
+                       shimphy@8000 {
+                               compatible = "brcm,brcmstb-ddr-shimphy";
+                               reg = <0x8000 0x13c>;
+                       };
+               };
        };
 };
index 7f59ea2ded6cb3ac37e8357a439baa46aee3428b..79e9769f7e003a728839717dff2c426638e62dfa 100644 (file)
        status = "okay";
 };
 
+&watchdog {
+       status = "okay";
+};
+
 /* FIXME: USB is wonky; disable it for now */
 &ehci0 {
        status = "disabled";
index 9e7d5228f2b7177fe48eb04c4ce984d872b411f0..28370ff77eeb8895c782ed7a919c06ba9f77d89e 100644 (file)
        status = "okay";
 };
 
+&watchdog {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
 &mspi {
        status = "okay";
 };
+
+&waketimer {
+       status = "okay";
+};
index 708207a0002d676c75baf367a3d1eedb5b5b41b1..41c1b510c230dd50a698b80e97deee15b6bc598e 100644 (file)
        status = "okay";
 };
 
+&watchdog {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
 &mspi {
        status = "okay";
 };
+
+&waketimer {
+       status = "okay";
+};
index 73c6dc9c8c6db9fb55738363b4d5d507c1c26c57..9f6c6c9b7ea7ff10a26d7f28d42ede2eff097ffd 100644 (file)
        status = "okay";
 };
 
+&watchdog {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
 &mspi {
        status = "okay";
 };
+
+&waketimer {
+       status = "okay";
+};
index 37bacfdcf9d9df288cdee1a95320a7898721c8a5..df8b755c390febd1d5478a117d55ad2d3eb0ba8c 100644 (file)
        status = "okay";
 };
 
+&watchdog {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
@@ -78,3 +82,7 @@
 &mspi {
        status = "okay";
 };
+
+&waketimer {
+       status = "okay";
+};
index f96241e94874315bd5532a9e660d30fd004c6590..086faeaa384a061fa5e746dd56ecdfc68a9777c1 100644 (file)
        status = "okay";
 };
 
+&watchdog {
+       status = "okay";
+};
+
 /* FIXME: MAC driver comes up but cannot attach to PHY */
 &enet0 {
        status = "disabled";
index ce762c7b2e549dbfc0ba33c22dc0ce96b593dbd2..0ed22217bf3a31601b423d1713c55c4cb15354da 100644 (file)
        status = "okay";
 };
 
+&watchdog {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
 &mspi {
        status = "okay";
 };
+
+&waketimer {
+       status = "okay";
+};
index d4dd31a543fd7ff87547643017674d0af2fdd90a..2c145a883aefc32791a25de970033cdc4bdaeeef 100644 (file)
        status = "okay";
 };
 
+&watchdog {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
 &mspi {
        status = "okay";
 };
+
+&waketimer {
+       status = "okay";
+};
index 2cd49b60e030424b624a8fb26f5ace63997b1ed6..1bd105428f61421eec2f5df8e470e7e056fb8598 100644 (file)
                                        #address-cells = <1>;
                                        #size-cells = <0>;
 
-                                       rtc@0x68 {
+                                       rtc@68 {
                                                compatible = "st,m41t81s";
                                                reg = <0x68>;
                                        };
index a4cc52214dbdbc4060c086ea75d85e3bd116b46e..38078594cf97ed5d7d6c99b8a2a09290aec2aba3 100644 (file)
                                        reg = <0x0 0x0 0x0 0x800000>;
                                };
 
-                               partition@0x800000 {
+                               partition@800000 {
                                        label = "u-boot";
                                        reg = <0x0 0x800000 0x0 0x200000>;
                                };
 
-                               partition@0xa00000 {
+                               partition@a00000 {
                                        label = "u-boot-env";
                                        reg = <0x0 0xa00000 0x0 0x200000>;
                                };
 
-                               partition@0xc00000 {
+                               partition@c00000 {
                                        label = "boot";
                                        reg = <0x0 0xc00000 0x0 0x4000000>;
                                };
 
-                               partition@0x8c00000 {
+                               partition@4c00000 {
                                        label = "system";
                                        reg = <0x0 0x4c00000 0x1 0xfb400000>;
                                };
diff --git a/arch/mips/boot/dts/mscc/Makefile b/arch/mips/boot/dts/mscc/Makefile
new file mode 100644 (file)
index 0000000..c511645
--- /dev/null
@@ -0,0 +1,3 @@
+dtb-$(CONFIG_LEGACY_BOARD_OCELOT)      += ocelot_pcb123.dtb
+
+obj-y                          += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
new file mode 100644 (file)
index 0000000..dd239ca
--- /dev/null
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "mscc,ocelot";
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       compatible = "mips,mips24KEc";
+                       device_type = "cpu";
+                       clocks = <&cpu_clk>;
+                       reg = <0>;
+               };
+       };
+
+       aliases {
+               serial0 = &uart0;
+       };
+
+       cpuintc: interrupt-controller {
+               #address-cells = <0>;
+               #interrupt-cells = <1>;
+               interrupt-controller;
+               compatible = "mti,cpu-interrupt-controller";
+       };
+
+       cpu_clk: cpu-clock {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <500000000>;
+       };
+
+       ahb_clk: ahb-clk {
+               compatible = "fixed-factor-clock";
+               #clock-cells = <0>;
+               clocks = <&cpu_clk>;
+               clock-div = <2>;
+               clock-mult = <1>;
+       };
+
+       ahb@70000000 {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges = <0 0x70000000 0x2000000>;
+
+               interrupt-parent = <&intc>;
+
+               cpu_ctrl: syscon@0 {
+                       compatible = "mscc,ocelot-cpu-syscon", "syscon";
+                       reg = <0x0 0x2c>;
+               };
+
+               intc: interrupt-controller@70 {
+                       compatible = "mscc,ocelot-icpu-intr";
+                       reg = <0x70 0x70>;
+                       #interrupt-cells = <1>;
+                       interrupt-controller;
+                       interrupt-parent = <&cpuintc>;
+                       interrupts = <2>;
+               };
+
+               uart0: serial@100000 {
+                       pinctrl-0 = <&uart_pins>;
+                       pinctrl-names = "default";
+                       compatible = "ns16550a";
+                       reg = <0x100000 0x20>;
+                       interrupts = <6>;
+                       clocks = <&ahb_clk>;
+                       reg-io-width = <4>;
+                       reg-shift = <2>;
+
+                       status = "disabled";
+               };
+
+               uart2: serial@100800 {
+                       pinctrl-0 = <&uart2_pins>;
+                       pinctrl-names = "default";
+                       compatible = "ns16550a";
+                       reg = <0x100800 0x20>;
+                       interrupts = <7>;
+                       clocks = <&ahb_clk>;
+                       reg-io-width = <4>;
+                       reg-shift = <2>;
+
+                       status = "disabled";
+               };
+
+               reset@1070008 {
+                       compatible = "mscc,ocelot-chip-reset";
+                       reg = <0x1070008 0x4>;
+               };
+
+               gpio: pinctrl@1070034 {
+                       compatible = "mscc,ocelot-pinctrl";
+                       reg = <0x1070034 0x68>;
+                       gpio-controller;
+                       #gpio-cells = <2>;
+                       gpio-ranges = <&gpio 0 0 22>;
+
+                       uart_pins: uart-pins {
+                               pins = "GPIO_6", "GPIO_7";
+                               function = "uart";
+                       };
+
+                       uart2_pins: uart2-pins {
+                               pins = "GPIO_12", "GPIO_13";
+                               function = "uart2";
+                       };
+               };
+       };
+};
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
new file mode 100644 (file)
index 0000000..29d6414
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/dts-v1/;
+
+#include "ocelot.dtsi"
+
+/ {
+       compatible = "mscc,ocelot-pcb123", "mscc,ocelot";
+
+       chosen {
+               stdout-path = "serial0:115200n8";
+       };
+
+       memory@0 {
+               device_type = "memory";
+               reg = <0x0 0x0e000000>;
+       };
+};
+
+&uart0 {
+       status = "okay";
+};
+
+&uart2 {
+       status = "okay";
+};
index d99f5242169e7acb31f8cfa71cd6e14d24e94c82..b3aec101a65d4ed96f4f986ea22c2210b297cdef 100644 (file)
@@ -2271,7 +2271,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
        parent_irq = irq_of_parse_and_map(ciu_node, 0);
        if (!parent_irq) {
-               pr_err("ERROR: Couldn't acquire parent_irq for %s\n.",
+               pr_err("ERROR: Couldn't acquire parent_irq for %s\n",
                        ciu_node->name);
                return -EINVAL;
        }
@@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
        addr = of_get_address(ciu_node, 0, NULL, NULL);
        if (!addr) {
-               pr_err("ERROR: Couldn't acquire reg(0) %s\n.", ciu_node->name);
+               pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name);
                return -EINVAL;
        }
        host_data->raw_reg = (u64)phys_to_virt(
@@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
        addr = of_get_address(ciu_node, 1, NULL, NULL);
        if (!addr) {
-               pr_err("ERROR: Couldn't acquire reg(1) %s\n.", ciu_node->name);
+               pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name);
                return -EINVAL;
        }
        host_data->en_reg = (u64)phys_to_virt(
@@ -2299,7 +2299,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
        r = of_property_read_u32(ciu_node, "cavium,max-bits", &val);
        if (r) {
-               pr_err("ERROR: Couldn't read cavium,max-bits from %s\n.",
+               pr_err("ERROR: Couldn't read cavium,max-bits from %s\n",
                        ciu_node->name);
                return r;
        }
@@ -2309,7 +2309,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
                                           &octeon_irq_domain_cib_ops,
                                           host_data);
        if (!cib_domain) {
-               pr_err("ERROR: Couldn't irq_domain_add_linear()\n.");
+               pr_err("ERROR: Couldn't irq_domain_add_linear()\n");
                return -ENOMEM;
        }
 
index 3cefa6bc01ddf880320209187f574340c5e3996e..47aecb8750e6f88f8a96028b4dab592e1a316a50 100644 (file)
@@ -72,6 +72,7 @@ CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
+CONFIG_SOC_BRCMSTB=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
index ca606e71f4d02611f2b1989013035c1c364d8610..1a5d5ea4ab2b5139ef841b7af9038f965909b3bd 100644 (file)
@@ -1,2 +1,4 @@
 CONFIG_CPU_MIPS32_R6=y
 CONFIG_HIGHMEM=y
+
+CONFIG_CRYPTO_CRC32_MIPS=y
index 7cac0339c4d52db0c419d3409ee411da8d400e1b..5dd8e8503e34b5d03f44d17ea0d553ac8b61f91d 100644 (file)
@@ -2,3 +2,5 @@ CONFIG_CPU_MIPS64_R6=y
 CONFIG_64BIT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
+
+CONFIG_CRYPTO_CRC32_MIPS=y
diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config
new file mode 100644 (file)
index 0000000..aa81576
--- /dev/null
@@ -0,0 +1,35 @@
+# require CONFIG_CPU_MIPS32_R2=y
+
+CONFIG_LEGACY_BOARD_OCELOT=y
+
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_PLATFORM=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_MTD_UBI=y
+
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+
+CONFIG_GPIO_SYSFS=y
+
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MUX=y
+
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_SPIDEV=y
+
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_OCELOT_RESET=y
+
+CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
new file mode 100644 (file)
index 0000000..e07aca5
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for MIPS crypto files..
+#
+
+obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
new file mode 100644 (file)
index 0000000..7d1d242
--- /dev/null
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
+ *
+ * Module based on arm64/crypto/crc32-arm.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2018 MIPS Tech, LLC
+ */
+
+#include <linux/unaligned/access_ok.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <asm/mipsregs.h>
+
+#include <crypto/internal/hash.h>
+
+enum crc_op_size {
+       b, h, w, d,
+};
+
+enum crc_type {
+       crc32,
+       crc32c,
+};
+
+#ifndef TOOLCHAIN_SUPPORTS_CRC
+#define _ASM_MACRO_CRC32(OP, SZ, TYPE)                                   \
+_ASM_MACRO_3R(OP, rt, rs, rt2,                                           \
+       ".ifnc  \\rt, \\rt2\n\t"                                          \
+       ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \
+       ".endif\n\t"                                                      \
+       _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) |      \
+                         ((SZ) <<  6) | ((TYPE) << 8))                   \
+       _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) |      \
+                         ((SZ) << 14) | ((TYPE) << 3)))
+_ASM_MACRO_CRC32(crc32b,  0, 0);
+_ASM_MACRO_CRC32(crc32h,  1, 0);
+_ASM_MACRO_CRC32(crc32w,  2, 0);
+_ASM_MACRO_CRC32(crc32d,  3, 0);
+_ASM_MACRO_CRC32(crc32cb, 0, 1);
+_ASM_MACRO_CRC32(crc32ch, 1, 1);
+_ASM_MACRO_CRC32(crc32cw, 2, 1);
+_ASM_MACRO_CRC32(crc32cd, 3, 1);
+#define _ASM_SET_CRC ""
+#else /* !TOOLCHAIN_SUPPORTS_CRC */
+#define _ASM_SET_CRC ".set\tcrc\n\t"
+#endif
+
+#define _CRC32(crc, value, size, type)         \
+do {                                           \
+       __asm__ __volatile__(                   \
+               ".set   push\n\t"               \
+               _ASM_SET_CRC                    \
+               #type #size "   %0, %1, %0\n\t" \
+               ".set   pop"                    \
+               : "+r" (crc)                    \
+               : "r" (value));                 \
+} while (0)
+
+#define CRC32(crc, value, size) \
+       _CRC32(crc, value, size, crc32)
+
+#define CRC32C(crc, value, size) \
+       _CRC32(crc, value, size, crc32c)
+
+static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+       u32 crc = crc_;
+
+#ifdef CONFIG_64BIT
+       while (len >= sizeof(u64)) {
+               u64 value = get_unaligned_le64(p);
+
+               CRC32(crc, value, d);
+               p += sizeof(u64);
+               len -= sizeof(u64);
+       }
+
+       if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+       while (len >= sizeof(u32)) {
+#endif
+               u32 value = get_unaligned_le32(p);
+
+               CRC32(crc, value, w);
+               p += sizeof(u32);
+               len -= sizeof(u32);
+       }
+
+       if (len & sizeof(u16)) {
+               u16 value = get_unaligned_le16(p);
+
+               CRC32(crc, value, h);
+               p += sizeof(u16);
+       }
+
+       if (len & sizeof(u8)) {
+               u8 value = *p++;
+
+               CRC32(crc, value, b);
+       }
+
+       return crc;
+}
+
+static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+       u32 crc = crc_;
+
+#ifdef CONFIG_64BIT
+       while (len >= sizeof(u64)) {
+               u64 value = get_unaligned_le64(p);
+
+               CRC32C(crc, value, d);
+               p += sizeof(u64);
+               len -= sizeof(u64);
+       }
+
+       if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+       while (len >= sizeof(u32)) {
+#endif
+               u32 value = get_unaligned_le32(p);
+
+               CRC32C(crc, value, w);
+               p += sizeof(u32);
+               len -= sizeof(u32);
+       }
+
+       if (len & sizeof(u16)) {
+               u16 value = get_unaligned_le16(p);
+
+               CRC32C(crc, value, h);
+               p += sizeof(u16);
+       }
+
+       if (len & sizeof(u8)) {
+               u8 value = *p++;
+
+               CRC32C(crc, value, b);
+       }
+       return crc;
+}
+
+#define CHKSUM_BLOCK_SIZE      1
+#define CHKSUM_DIGEST_SIZE     4
+
+struct chksum_ctx {
+       u32 key;
+};
+
+struct chksum_desc_ctx {
+       u32 crc;
+};
+
+static int chksum_init(struct shash_desc *desc)
+{
+       struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       ctx->crc = mctx->key;
+
+       return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
+                        unsigned int keylen)
+{
+       struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+       if (keylen != sizeof(mctx->key)) {
+               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       mctx->key = get_unaligned_le32(key);
+       return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+                        unsigned int length)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       ctx->crc = crc32_mips_le_hw(ctx->crc, data, length);
+       return 0;
+}
+
+static int chksumc_update(struct shash_desc *desc, const u8 *data,
+                        unsigned int length)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       ctx->crc = crc32c_mips_le_hw(ctx->crc, data, length);
+       return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       put_unaligned_le32(ctx->crc, out);
+       return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       put_unaligned_le32(~ctx->crc, out);
+       return 0;
+}
+
+static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+       put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out);
+       return 0;
+}
+
+static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+       put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out);
+       return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+                       unsigned int len, u8 *out)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       return __chksum_finup(ctx->crc, data, len, out);
+}
+
+static int chksumc_finup(struct shash_desc *desc, const u8 *data,
+                       unsigned int len, u8 *out)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       return __chksumc_finup(ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+                        unsigned int length, u8 *out)
+{
+       struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+       return __chksum_finup(mctx->key, data, length, out);
+}
+
+static int chksumc_digest(struct shash_desc *desc, const u8 *data,
+                        unsigned int length, u8 *out)
+{
+       struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+       return __chksumc_finup(mctx->key, data, length, out);
+}
+
+static int chksum_cra_init(struct crypto_tfm *tfm)
+{
+       struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+       mctx->key = ~0;
+       return 0;
+}
+
+static struct shash_alg crc32_alg = {
+       .digestsize             =       CHKSUM_DIGEST_SIZE,
+       .setkey                 =       chksum_setkey,
+       .init                   =       chksum_init,
+       .update                 =       chksum_update,
+       .final                  =       chksum_final,
+       .finup                  =       chksum_finup,
+       .digest                 =       chksum_digest,
+       .descsize               =       sizeof(struct chksum_desc_ctx),
+       .base                   =       {
+               .cra_name               =       "crc32",
+               .cra_driver_name        =       "crc32-mips-hw",
+               .cra_priority           =       300,
+               .cra_flags              =       CRYPTO_ALG_OPTIONAL_KEY,
+               .cra_blocksize          =       CHKSUM_BLOCK_SIZE,
+               .cra_alignmask          =       0,
+               .cra_ctxsize            =       sizeof(struct chksum_ctx),
+               .cra_module             =       THIS_MODULE,
+               .cra_init               =       chksum_cra_init,
+       }
+};
+
+static struct shash_alg crc32c_alg = {
+       .digestsize             =       CHKSUM_DIGEST_SIZE,
+       .setkey                 =       chksum_setkey,
+       .init                   =       chksum_init,
+       .update                 =       chksumc_update,
+       .final                  =       chksumc_final,
+       .finup                  =       chksumc_finup,
+       .digest                 =       chksumc_digest,
+       .descsize               =       sizeof(struct chksum_desc_ctx),
+       .base                   =       {
+               .cra_name               =       "crc32c",
+               .cra_driver_name        =       "crc32c-mips-hw",
+               .cra_priority           =       300,
+               .cra_flags              =       CRYPTO_ALG_OPTIONAL_KEY,
+               .cra_blocksize          =       CHKSUM_BLOCK_SIZE,
+               .cra_alignmask          =       0,
+               .cra_ctxsize            =       sizeof(struct chksum_ctx),
+               .cra_module             =       THIS_MODULE,
+               .cra_init               =       chksum_cra_init,
+       }
+};
+
+static int __init crc32_mod_init(void)
+{
+       int err;
+
+       err = crypto_register_shash(&crc32_alg);
+
+       if (err)
+               return err;
+
+       err = crypto_register_shash(&crc32c_alg);
+
+       if (err) {
+               crypto_unregister_shash(&crc32_alg);
+               return err;
+       }
+
+       return 0;
+}
+
+static void __exit crc32_mod_exit(void)
+{
+       crypto_unregister_shash(&crc32_alg);
+       crypto_unregister_shash(&crc32c_alg);
+}
+
+MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com");
+MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions");
+MODULE_LICENSE("GPL v2");
+
+module_cpu_feature_match(MIPS_CRC32, crc32_mod_init);
+module_exit(crc32_mod_exit);
index 2ff3b17bfab18d829d74a1cb8c0e3fb790789d1b..ba9b2c8cce6837dcb2bc5e84b07c6f5dcda6c297 100644 (file)
@@ -27,6 +27,22 @@ config LEGACY_BOARD_SEAD3
          Enable this to include support for booting on MIPS SEAD-3 FPGA-based
          development boards, which boot using a legacy boot protocol.
 
+comment "MSCC Ocelot doesn't work with SEAD3 enabled"
+       depends on LEGACY_BOARD_SEAD3
+
+config LEGACY_BOARD_OCELOT
+       bool "Support MSCC Ocelot boards"
+       depends on LEGACY_BOARD_SEAD3=n
+       select LEGACY_BOARDS
+       select MSCC_OCELOT
+
+config MSCC_OCELOT
+       bool
+       select GPIOLIB
+       select MSCC_OCELOT_IRQ
+       select SYS_HAS_EARLY_PRINTK
+       select USE_GENERIC_EARLY_PRINTK_8250
+
 comment "FIT/UHI Boards"
 
 config FIT_IMAGE_FDT_BOSTON
index 5c31e0c4697dd69a88cde68eee45c89c331ec0db..d03a36f869a4ec978e30afb113b9d65e828d2831 100644 (file)
@@ -14,5 +14,6 @@ obj-y += proc.o
 
 obj-$(CONFIG_YAMON_DT_SHIM)            += yamon-dt.o
 obj-$(CONFIG_LEGACY_BOARD_SEAD3)       += board-sead3.o
+obj-$(CONFIG_LEGACY_BOARD_OCELOT)      += board-ocelot.o
 obj-$(CONFIG_KEXEC)                    += kexec.o
 obj-$(CONFIG_VIRT_BOARD_RANCHU)                += board-ranchu.o
diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c
new file mode 100644 (file)
index 0000000..06d92fb
--- /dev/null
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Microsemi MIPS SoC support
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+#include <asm/machine.h>
+#include <asm/prom.h>
+
+#define DEVCPU_GCB_CHIP_REGS_CHIP_ID   0x71070000
+#define CHIP_ID_PART_ID                        GENMASK(27, 12)
+
+#define OCELOT_PART_ID                 (0x7514 << 12)
+
+#define UART_UART                      0x70100000
+
+static __init bool ocelot_detect(void)
+{
+       u32 rev;
+       int idx;
+
+       /* Look for the TLB entry set up by redboot before trying to use it */
+       write_c0_entryhi(DEVCPU_GCB_CHIP_REGS_CHIP_ID);
+       mtc0_tlbw_hazard();
+       tlb_probe();
+       tlb_probe_hazard();
+       idx = read_c0_index();
+       if (idx < 0)
+               return 0;
+
+       /* A TLB entry exists, lets assume its usable and check the CHIP ID */
+       rev = __raw_readl((void __iomem *)DEVCPU_GCB_CHIP_REGS_CHIP_ID);
+
+       if ((rev & CHIP_ID_PART_ID) != OCELOT_PART_ID)
+               return 0;
+
+       /* Copy command line from bootloader early for Initrd detection */
+       if (fw_arg0 < 10 && (fw_arg1 & 0xFFF00000) == 0x80000000) {
+               unsigned int prom_argc = fw_arg0;
+               const char **prom_argv = (const char **)fw_arg1;
+
+               if (prom_argc > 1 && strlen(prom_argv[1]) > 0)
+                       /* ignore all built-in args if any f/w args given */
+                       strcpy(arcs_cmdline, prom_argv[1]);
+       }
+
+       return 1;
+}
+
+static void __init ocelot_earlyprintk_init(void)
+{
+       void __iomem *uart_base;
+
+       uart_base = ioremap_nocache(UART_UART, 0x20);
+       setup_8250_early_printk_port((unsigned long)uart_base, 2, 50000);
+}
+
+static void __init ocelot_late_init(void)
+{
+       ocelot_earlyprintk_init();
+}
+
+static __init const void *ocelot_fixup_fdt(const void *fdt,
+                                          const void *match_data)
+{
+       /* This has to be done so late because ioremap needs to work */
+       late_time_init = ocelot_late_init;
+
+       return fdt;
+}
+
+extern char __dtb_ocelot_pcb123_begin[];
+
+MIPS_MACHINE(ocelot) = {
+       .fdt = __dtb_ocelot_pcb123_begin,
+       .fixup_fdt = ocelot_fixup_fdt,
+       .detect = ocelot_detect,
+};
index 721b698bfe3cf7e0274bb9f0bb4bf58bfecef29e..5f74590e0bea45f526b778efd46d002c113417f1 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
+#include <asm/isa-rev.h>
 #include <cpu-feature-overrides.h>
 
 /*
 # define cpu_has_perf          (cpu_data[0].options & MIPS_CPU_PERF)
 #endif
 
-#if defined(CONFIG_SMP) && defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+#if defined(CONFIG_SMP) && (MIPS_ISA_REV >= 6)
 /*
  * Some systems share FTLB RAMs between threads within a core (siblings in
  * kernel parlance). This means that FTLB entries may become invalid at almost
 #  define cpu_has_shared_ftlb_entries \
        (current_cpu_data.options & MIPS_CPU_SHARED_FTLB_ENTRIES)
 # endif
-#endif /* SMP && __mips_isa_rev >= 6 */
+#endif /* SMP && MIPS_ISA_REV >= 6 */
 
 #ifndef cpu_has_shared_ftlb_ram
 # define cpu_has_shared_ftlb_ram 0
diff --git a/arch/mips/include/asm/isa-rev.h b/arch/mips/include/asm/isa-rev.h
new file mode 100644 (file)
index 0000000..683ea34
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 MIPS Tech, LLC
+ * Author: Matt Redfearn <matt.redfearn@mips.com>
+ */
+
+#ifndef __MIPS_ASM_ISA_REV_H__
+#define __MIPS_ASM_ISA_REV_H__
+
+/*
+ * The ISA revision level. This is 0 for MIPS I to V and N for
+ * MIPS{32,64}rN.
+ */
+
+/* If the compiler has defined __mips_isa_rev, believe it. */
+#ifdef __mips_isa_rev
+#define MIPS_ISA_REV __mips_isa_rev
+#else
+/* The compiler hasn't defined the isa rev so assume it's MIPS I - V (0) */
+#define MIPS_ISA_REV 0
+#endif
+
+
+#endif /* __MIPS_ASM_ISA_REV_H__ */
index aa3800c823321e50c577d78a7d9c8b63004d46f4..d99ca862dae32babbe68d039572a8e0f455ef5f0 100644 (file)
 #define AR71XX_AHB_DIV_MASK            0x7
 
 #define AR724X_PLL_REG_CPU_CONFIG      0x00
-#define AR724X_PLL_REG_PCIE_CONFIG     0x18
+#define AR724X_PLL_REG_PCIE_CONFIG     0x10
 
 #define AR724X_PLL_FB_SHIFT            0
 #define AR724X_PLL_FB_MASK             0x3ff
index 858752dac3373475234063ba8940013bc5f97cae..f65859784a4c1f41b70050b2d35a07bafade6535 100644 (file)
 #define MIPS_CONF5_FRE         (_ULCAST_(1) << 8)
 #define MIPS_CONF5_UFE         (_ULCAST_(1) << 9)
 #define MIPS_CONF5_CA2         (_ULCAST_(1) << 14)
+#define MIPS_CONF5_CRCP                (_ULCAST_(1) << 18)
 #define MIPS_CONF5_MSAEN       (_ULCAST_(1) << 27)
 #define MIPS_CONF5_EVA         (_ULCAST_(1) << 28)
 #define MIPS_CONF5_CV          (_ULCAST_(1) << 29)
index 600ad8fd683565c5b756c4654cb1eba4ed346af8..a2aba4b059e63ff535ac3f46d35efa67ef258bf0 100644 (file)
@@ -5,5 +5,6 @@
 /* HWCAP flags */
 #define HWCAP_MIPS_R6          (1 << 0)
 #define HWCAP_MIPS_MSA         (1 << 1)
+#define HWCAP_MIPS_CRC32       (1 << 2)
 
 #endif /* _UAPI_ASM_HWCAP_H */
index 606e02ca4b6cf8e4affffad5b5dff94bee2921d6..3035ca499cd8a5e6e54e10876de3abea6aecd522 100644 (file)
@@ -50,6 +50,7 @@
 #define MAP_NONBLOCK   0x20000         /* do not block on IO */
 #define MAP_STACK      0x40000         /* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB    0x80000         /* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000   /* MAP_FIXED which doesn't unmap underlying mapping */
 
 /*
  * Flags for msync
index cf3fd549e16d05a6e666ee8d96e4bde06878c650..6b07b739f914d06ebb6fc8777e4b3fb2be914547 100644 (file)
@@ -848,6 +848,9 @@ static inline unsigned int decode_config5(struct cpuinfo_mips *c)
        if (config5 & MIPS_CONF5_CA2)
                c->ases |= MIPS_ASE_MIPS16E2;
 
+       if (config5 & MIPS_CONF5_CRCP)
+               elf_hwcap |= HWCAP_MIPS_CRC32;
+
        return config5 & MIPS_CONF_M;
 }
 
index 421e06dfee728a973452c11e8b51a128efaae032..55c3fbeb2df628e73a26de53a9b5622c7a785a0e 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/suspend.h>
 
 #include <asm/asm-offsets.h>
 #include <asm/cacheflush.h>
@@ -670,6 +671,34 @@ static int cps_pm_online_cpu(unsigned int cpu)
        return 0;
 }
 
+static int cps_pm_power_notifier(struct notifier_block *this,
+                                unsigned long event, void *ptr)
+{
+       unsigned int stat;
+
+       switch (event) {
+       case PM_SUSPEND_PREPARE:
+               stat = read_cpc_cl_stat_conf();
+               /*
+                * If we're attempting to suspend the system and power down all
+                * of the cores, the JTAG detect bit indicates that the CPC will
+                * instead put the cores into clock-off state. In this state
+                * a connected debugger can cause the CPU to attempt
+                * interactions with the powered down system. At best this will
+                * fail. At worst, it can hang the NoC, requiring a hard reset.
+                * To avoid this, just block system suspend if a JTAG probe
+                * is detected.
+                */
+               if (stat & CPC_Cx_STAT_CONF_EJTAG_PROBE) {
+                       pr_warn("JTAG probe is connected - abort suspend\n");
+                       return NOTIFY_BAD;
+               }
+               return NOTIFY_DONE;
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
 static int __init cps_pm_init(void)
 {
        /* A CM is required for all non-coherent states */
@@ -705,6 +734,8 @@ static int __init cps_pm_init(void)
                pr_warn("pm-cps: no CPC, clock & power gating unavailable\n");
        }
 
+       pm_notifier(cps_pm_power_notifier, 0);
+
        return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mips/cps_pm:online",
                                 cps_pm_online_cpu, NULL);
 }
index 7c746d3458e71a93594760866f3933e6351bcc5a..6288780b779e7c451f19f9160e0823ac1decb2ff 100644 (file)
@@ -13,6 +13,9 @@
 #include <linux/reboot.h>
 #include <linux/delay.h>
 
+#include <asm/compiler.h>
+#include <asm/idle.h>
+#include <asm/mipsregs.h>
 #include <asm/reboot.h>
 
 /*
@@ -26,6 +29,62 @@ void (*pm_power_off)(void);
 
 EXPORT_SYMBOL(pm_power_off);
 
+static void machine_hang(void)
+{
+       /*
+        * We're hanging the system so we don't want to be interrupted anymore.
+        * Any interrupt handlers that ran would at best be useless & at worst
+        * go awry because the system isn't in a functional state.
+        */
+       local_irq_disable();
+
+       /*
+        * Mask all interrupts, giving us a better chance of remaining in the
+        * low power wait state.
+        */
+       clear_c0_status(ST0_IM);
+
+       while (true) {
+               if (cpu_has_mips_r) {
+                       /*
+                        * We know that the wait instruction is supported so
+                        * make use of it directly, leaving interrupts
+                        * disabled.
+                        */
+                       asm volatile(
+                               ".set   push\n\t"
+                               ".set   " MIPS_ISA_ARCH_LEVEL "\n\t"
+                               "wait\n\t"
+                               ".set   pop");
+               } else if (cpu_wait) {
+                       /*
+                        * Try the cpu_wait() callback. This isn't ideal since
+                        * it'll re-enable interrupts, but that ought to be
+                        * harmless given that they're all masked.
+                        */
+                       cpu_wait();
+                       local_irq_disable();
+               } else {
+                       /*
+                        * We're going to burn some power running round the
+                        * loop, but we don't really have a choice. This isn't
+                        * a path we should expect to run for long during
+                        * typical use anyway.
+                        */
+               }
+
+               /*
+                * In most modern MIPS CPUs interrupts will cause the wait
+                * instruction to graduate even when disabled, and in some
+                * cases even when masked. In order to prevent a timer
+                * interrupt from continuously taking us out of the low power
+                * wait state, we clear any pending timer interrupt here.
+                */
+               if (cpu_has_counter)
+                       write_c0_compare(0);
+       }
+}
+
 void machine_restart(char *command)
 {
        if (_machine_restart)
@@ -38,8 +97,7 @@ void machine_restart(char *command)
        do_kernel_restart(command);
        mdelay(1000);
        pr_emerg("Reboot failed -- System halted\n");
-       local_irq_disable();
-       while (1);
+       machine_hang();
 }
 
 void machine_halt(void)
@@ -51,8 +109,7 @@ void machine_halt(void)
        preempt_disable();
        smp_send_stop();
 #endif
-       local_irq_disable();
-       while (1);
+       machine_hang();
 }
 
 void machine_power_off(void)
@@ -64,6 +121,5 @@ void machine_power_off(void)
        preempt_disable();
        smp_send_stop();
 #endif
-       local_irq_disable();
-       while (1);
+       machine_hang();
 }
index 5f8b0a9e30b3d6faec9befca1e759a8f9263f8c8..563188ac6fa264ab930cab5a6040d173e21b542c 100644 (file)
@@ -155,7 +155,8 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add
        add_memory_region(start, size, BOOT_MEM_RAM);
 }
 
-bool __init memory_region_available(phys_addr_t start, phys_addr_t size)
+static bool __init __maybe_unused memory_region_available(phys_addr_t start,
+                                                         phys_addr_t size)
 {
        int i;
        bool in_ram = false, free = true;
@@ -453,7 +454,7 @@ static void __init bootmem_init(void)
                pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
                        (min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),
                        min_low_pfn - ARCH_PFN_OFFSET);
-       } else if (min_low_pfn < ARCH_PFN_OFFSET) {
+       } else if (ARCH_PFN_OFFSET - min_low_pfn > 0UL) {
                pr_info("%lu free pages won't be used\n",
                        ARCH_PFN_OFFSET - min_low_pfn);
        }
index 84b7b592b834dcd58e762a74ce552672e703e4ee..400676ce03f429ab304cf7a89732b6193e1e5631 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/hardirq.h>
 #include <linux/gfp.h>
 #include <linux/kcore.h>
-#include <linux/export.h>
 #include <linux/initrd.h>
 
 #include <asm/asm-offsets.h>
@@ -46,7 +45,6 @@
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
-#include <asm/maar.h>
 
 /*
  * We have up to 8 empty zeroed pages so we can map one of the right colour
index 33d3251ecd37a257c2cb435a973e612738e0a216..2f616ebeb7e0cff264a7d399a8c341c694e8c003 100644 (file)
@@ -24,20 +24,20 @@ EXPORT_SYMBOL(shm_align_mask);
 #define MIN_GAP (128*1024*1024UL)
 #define MAX_GAP ((TASK_SIZE)/6*5)
 
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
 {
        if (current->personality & ADDR_COMPAT_LAYOUT)
                return 1;
 
-       if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+       if (rlim_stack->rlim_cur == RLIM_INFINITY)
                return 1;
 
        return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
-       unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long gap = rlim_stack->rlim_cur;
 
        if (gap < MIN_GAP)
                gap = MIN_GAP;
@@ -158,18 +158,18 @@ unsigned long arch_mmap_rnd(void)
        return rnd << PAGE_SHIFT;
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        unsigned long random_factor = 0UL;
 
        if (current->flags & PF_RANDOMIZE)
                random_factor = arch_mmap_rnd();
 
-       if (mmap_is_legacy()) {
+       if (mmap_is_legacy(rlim_stack)) {
                mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
                mm->get_unmapped_area = arch_get_unmapped_area;
        } else {
-               mm->mmap_base = mmap_base(random_factor);
+               mm->mmap_base = mmap_base(random_factor, rlim_stack);
                mm->get_unmapped_area = arch_get_unmapped_area_topdown;
        }
 }
index 88a2075305d1c1d71d09c7e3860367609b81ca04..57154c5883b6f810dd26eab3f981d712dd1ee4d0 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include <asm/asm.h>
+#include <asm/isa-rev.h>
 #include <asm/regdef.h>
 #include "bpf_jit.h"
 
@@ -65,7 +66,7 @@ FEXPORT(sk_load_word_positive)
        lw      $r_A, 0(t1)
        .set    noreorder
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
        wsbh    t0, $r_A
        rotr    $r_A, t0, 16
 # else
@@ -92,7 +93,7 @@ FEXPORT(sk_load_half_positive)
        PTR_ADDU t1, $r_skb_data, offset
        lhu     $r_A, 0(t1)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
        wsbh    $r_A, $r_A
 # else
        sll     t0, $r_A, 8
@@ -170,7 +171,7 @@ FEXPORT(sk_load_byte_positive)
 NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
        bpf_slow_path_common(4)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
        wsbh    t0, $r_s0
        jr      $r_ra
         rotr   $r_A, t0, 16
@@ -196,7 +197,7 @@ NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
 NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
        bpf_slow_path_common(2)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
        jr      $r_ra
         wsbh   $r_A, $r_s0
 # else
index 407f155f0bb6f5d5fa7e78f5aa611085843653d8..f6b77788124af28e5871b4c00c24576672e23084 100644 (file)
@@ -315,6 +315,7 @@ static int mt7620_pci_probe(struct platform_device *pdev)
                break;
 
        case MT762X_SOC_MT7628AN:
+       case MT762X_SOC_MT7688:
                if (mt7628_pci_hw_init(pdev))
                        return -1;
                break;
index f5b367e20dff1cfa1f0a1b3f3a1e86f04f12dc6e..31955c1d55550fd43ccc7f310687f6b6a67bdac9 100644 (file)
@@ -319,7 +319,7 @@ static void __init rbtx4927_mtd_init(void)
 
 static void __init rbtx4927_gpioled_init(void)
 {
-       static struct gpio_led leds[] = {
+       static const struct gpio_led leds[] = {
                { .name = "gpioled:green:0", .gpio = 0, .active_low = 1, },
                { .name = "gpioled:green:1", .gpio = 1, .active_low = 1, },
        };
index be37bbb1f061427d2f786395d491bf83886edd45..428a1917afc6b4516ed40e25faec50595f2afbd8 100644 (file)
@@ -10,6 +10,8 @@
 
 #include "vdso.h"
 
+#include <asm/isa-rev.h>
+
 #include <linux/elfnote.h>
 #include <linux/version.h>
 
@@ -40,11 +42,7 @@ __mips_abiflags:
        .byte   __mips          /* isa_level */
 
        /* isa_rev */
-#ifdef __mips_isa_rev
-       .byte   __mips_isa_rev
-#else
-       .byte   0
-#endif
+       .byte   MIPS_ISA_REV
 
        /* gpr_size */
 #ifdef __mips64
@@ -54,7 +52,7 @@ __mips_abiflags:
 #endif
 
        /* cpr1_size */
-#if (defined(__mips_isa_rev) && __mips_isa_rev >= 6) || defined(__mips64)
+#if (MIPS_ISA_REV >= 6) || defined(__mips64)
        .byte   2               /* AFL_REG_64 */
 #else
        .byte   1               /* AFL_REG_32 */
index 7b9b20a381cbc1f9852d055fc09b4f4e316288f1..1240f148ec0f340480b079cf65561699ec1ca501 100644 (file)
@@ -34,8 +34,8 @@ void flush_anon_page(struct vm_area_struct *vma,
 void flush_kernel_dcache_page(struct page *page);
 void flush_icache_range(unsigned long start, unsigned long end);
 void flush_icache_page(struct vm_area_struct *vma, struct page *page);
-#define flush_dcache_mmap_lock(mapping)   spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)   xa_lock_irq(&(mapping)->i_pages)
+#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
 
 #else
 #include <asm-generic/cacheflush.h>
index 55e383c173f776f35826d4d5d70b5af41f552a46..18eb9f69f8063b830e3cb3fa6060c7e6cebb4b66 100644 (file)
@@ -46,9 +46,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 extern void flush_dcache_range(unsigned long start, unsigned long end);
 extern void invalidate_dcache_range(unsigned long start, unsigned long end);
 
-#define flush_dcache_mmap_lock(mapping) \
-       spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-       spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)                xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)      xa_unlock_irq(&mapping->i_pages)
 
 #endif /* _ASM_NIOS2_CACHEFLUSH_H */
index 20e86209ef2e03646b49dd1f00324dae0ecedf72..ab88b6dd467911dd72a092328737d9599a55e9d3 100644 (file)
@@ -336,9 +336,9 @@ static int __init nios2_time_init(struct device_node *timer)
        return ret;
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
-       ts->tv_sec = mktime(2007, 1, 1, 0, 0, 0);
+       ts->tv_sec = mktime64(2007, 1, 1, 0, 0, 0);
        ts->tv_nsec = 0;
 }
 
index bd5ce31936f5b196c3ce0482ee962eb93df46216..0c83644bfa5cbb3624b7d33b3fc70cf5d78f4a00 100644 (file)
@@ -55,10 +55,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *page);
 
-#define flush_dcache_mmap_lock(mapping) \
-       spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-       spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)                xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)      xa_unlock_irq(&mapping->i_pages)
 
 #define flush_icache_page(vma,page)    do {            \
        flush_kernel_dcache_page(page);                 \
index a056a642bb312924c63ae79ca919073cc984020d..870fbf8c708811bca3bbbbffea8b21afa55f17b6 100644 (file)
@@ -26,6 +26,7 @@
 #define MAP_NONBLOCK   0x20000         /* do not block on IO */
 #define MAP_STACK      0x40000         /* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB    0x80000         /* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000   /* MAP_FIXED which doesn't unmap underlying mapping */
 
 #define MS_SYNC                1               /* synchronous memory sync */
 #define MS_ASYNC       2               /* sync memory asynchronously */
index 8c99ebbe2bac7b73725173736b77479583345c8b..43b308cfdf532264d73e64ba5037950dd5a24fcb 100644 (file)
@@ -70,12 +70,18 @@ static inline unsigned long COLOR_ALIGN(unsigned long addr,
  * Top of mmap area (just below the process stack).
  */
 
-static unsigned long mmap_upper_limit(void)
+/*
+ * When called from arch_get_unmapped_area(), rlim_stack will be NULL,
+ * indicating that "current" should be used instead of a passed-in
+ * value from the exec bprm as done with arch_pick_mmap_layout().
+ */
+static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
 {
        unsigned long stack_base;
 
        /* Limit stack size - see setup_arg_pages() in fs/exec.c */
-       stack_base = rlimit_max(RLIMIT_STACK);
+       stack_base = rlim_stack ? rlim_stack->rlim_max
+                               : rlimit_max(RLIMIT_STACK);
        if (stack_base > STACK_SIZE_MAX)
                stack_base = STACK_SIZE_MAX;
 
@@ -127,7 +133,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
        info.flags = 0;
        info.length = len;
        info.low_limit = mm->mmap_legacy_base;
-       info.high_limit = mmap_upper_limit();
+       info.high_limit = mmap_upper_limit(NULL);
        info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
        info.align_offset = shared_align_offset(last_mmap, pgoff);
        addr = vm_unmapped_area(&info);
@@ -250,10 +256,10 @@ static unsigned long mmap_legacy_base(void)
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        mm->mmap_legacy_base = mmap_legacy_base();
-       mm->mmap_base = mmap_upper_limit();
+       mm->mmap_base = mmap_upper_limit(rlim_stack);
 
        if (mmap_is_legacy()) {
                mm->mmap_base = mm->mmap_legacy_base;
index f7e684560186f9c3d5db133b8e66281c0f3c0e12..c3830400ca28ef1c37f7e9909b1bd7cc011c27af 100644 (file)
@@ -174,7 +174,7 @@ static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
 
        /* we treat tod_sec as unsigned, so this can work until year 2106 */
        rtc_time64_to_tm(tod_data.tod_sec, tm);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
index d503f344e476e497912def22e544d56a88b65567..b24ce40acd475bc304084789897e8308651fdb42 100644 (file)
 #define MIN_GAP (128*1024*1024)
 #define MAX_GAP (TASK_SIZE/6*5)
 
-static inline int mmap_is_legacy(void)
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
 {
        if (current->personality & ADDR_COMPAT_LAYOUT)
                return 1;
 
-       if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+       if (rlim_stack->rlim_cur == RLIM_INFINITY)
                return 1;
 
        return sysctl_legacy_va_layout;
@@ -76,9 +76,10 @@ static inline unsigned long stack_maxrandom_size(void)
                return (1<<30);
 }
 
-static inline unsigned long mmap_base(unsigned long rnd)
+static inline unsigned long mmap_base(unsigned long rnd,
+                                     struct rlimit *rlim_stack)
 {
-       unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long gap = rlim_stack->rlim_cur;
        unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
 
        /* Values close to RLIM_INFINITY can overflow. */
@@ -196,26 +197,28 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 }
 
 static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
-                                       unsigned long random_factor)
+                                       unsigned long random_factor,
+                                       struct rlimit *rlim_stack)
 {
-       if (mmap_is_legacy()) {
+       if (mmap_is_legacy(rlim_stack)) {
                mm->mmap_base = TASK_UNMAPPED_BASE;
                mm->get_unmapped_area = radix__arch_get_unmapped_area;
        } else {
-               mm->mmap_base = mmap_base(random_factor);
+               mm->mmap_base = mmap_base(random_factor, rlim_stack);
                mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
        }
 }
 #else
 /* dummy */
 extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
-                                       unsigned long random_factor);
+                                       unsigned long random_factor,
+                                       struct rlimit *rlim_stack);
 #endif
 /*
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        unsigned long random_factor = 0UL;
 
@@ -223,16 +226,17 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
                random_factor = arch_mmap_rnd();
 
        if (radix_enabled())
-               return radix__arch_pick_mmap_layout(mm, random_factor);
+               return radix__arch_pick_mmap_layout(mm, random_factor,
+                                                   rlim_stack);
        /*
         * Fall back to the standard layout if the personality
         * bit is set, or if the expected stack growth is unlimited:
         */
-       if (mmap_is_legacy()) {
+       if (mmap_is_legacy(rlim_stack)) {
                mm->mmap_base = TASK_UNMAPPED_BASE;
                mm->get_unmapped_area = arch_get_unmapped_area;
        } else {
-               mm->mmap_base = mmap_base(random_factor);
+               mm->mmap_base = mmap_base(random_factor, rlim_stack);
                mm->get_unmapped_area = arch_get_unmapped_area_topdown;
        }
 }
index 9a8a084e4abac15355841189bd4d60efd3af03c3..4c615fcb0cf073bbdc8f746c0e54abdd1082e142 100644 (file)
@@ -75,8 +75,7 @@ EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
 /*
  * Taken from alloc_migrate_target with changes to remove CMA allocations
  */
-struct page *new_iommu_non_cma_page(struct page *page, unsigned long private,
-                                       int **resultp)
+struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
 {
        gfp_t gfp_mask = GFP_USER;
        struct page *new_page;
index 516e23de5a3d3fe0ecd28aba329e22c13db48eab..48fbb41af5d152c2508b2f631502bee8653207ad 100644 (file)
@@ -824,6 +824,9 @@ static int __init opal_init(void)
        /* Create i2c platform devices */
        opal_pdev_init("ibm,opal-i2c");
 
+       /* Handle non-volatile memory devices */
+       opal_pdev_init("pmem-region");
+
        /* Setup a heatbeat thread if requested by OPAL */
        opal_init_heartbeat();
 
index 831bdcf407bbc1d2d76edc78e6a9f50aae1406cb..0a7627cdb34e7f3676673b203a1c660a662965ec 100644 (file)
@@ -37,11 +37,11 @@ static unsigned long stack_maxrandom_size(void)
 #define MIN_GAP (32*1024*1024)
 #define MAX_GAP (STACK_TOP/6*5)
 
-static inline int mmap_is_legacy(void)
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
 {
        if (current->personality & ADDR_COMPAT_LAYOUT)
                return 1;
-       if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+       if (rlim_stack->rlim_cur == RLIM_INFINITY)
                return 1;
        return sysctl_legacy_va_layout;
 }
@@ -56,9 +56,10 @@ static unsigned long mmap_base_legacy(unsigned long rnd)
        return TASK_UNMAPPED_BASE + rnd;
 }
 
-static inline unsigned long mmap_base(unsigned long rnd)
+static inline unsigned long mmap_base(unsigned long rnd,
+                                     struct rlimit *rlim_stack)
 {
-       unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long gap = rlim_stack->rlim_cur;
 
        if (gap < MIN_GAP)
                gap = MIN_GAP;
@@ -184,7 +185,7 @@ check_asce_limit:
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        unsigned long random_factor = 0UL;
 
@@ -195,11 +196,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
         * Fall back to the standard layout if the personality
         * bit is set, or if the expected stack growth is unlimited:
         */
-       if (mmap_is_legacy()) {
+       if (mmap_is_legacy(rlim_stack)) {
                mm->mmap_base = mmap_base_legacy(random_factor);
                mm->get_unmapped_area = arch_get_unmapped_area;
        } else {
-               mm->mmap_base = mmap_base(random_factor);
+               mm->mmap_base = mmap_base(random_factor, rlim_stack);
                mm->get_unmapped_area = arch_get_unmapped_area_topdown;
        }
 }
index 348a17ecdf66be45f6432ee829aa16e18cf9bea4..9ef8de63f28b10234f625c706d1f4f1e5162dfcc 100644 (file)
@@ -276,7 +276,7 @@ static unsigned long mmap_rnd(void)
        return rnd << PAGE_SHIFT;
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        unsigned long random_factor = mmap_rnd();
        unsigned long gap;
@@ -285,7 +285,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
         * Fall back to the standard layout if the personality
         * bit is set, or if the expected stack growth is unlimited:
         */
-       gap = rlimit(RLIMIT_STACK);
+       gap = rlim_stack->rlim_cur;
        if (!test_thread_flag(TIF_32BIT) ||
            (current->personality & ADDR_COMPAT_LAYOUT) ||
            gap == RLIM_INFINITY ||
index e871af24d9cd276617ea75f5895a4b7d4043af55..c390f3deb0dcd72adc3eecb3892e63a3a7c0093e 100644 (file)
@@ -109,6 +109,17 @@ config UML_NET_DAEMON
         more than one without conflict.  If you don't need UML networking,
         say N.
 
+config UML_NET_VECTOR
+       bool "Vector I/O high performance network devices"
+       depends on UML_NET
+       help
+       This User-Mode Linux network driver uses multi-message send
+       and receive functions. The host running the UML guest must have
+       a linux kernel version above 3.0 and a libc version > 2.13.
+       This driver provides tap, raw, gre and l2tpv3 network transports
+       with up to 4 times higher network throughput than the UML network
+       drivers.
+
 config UML_NET_VDE
        bool "VDE transport"
        depends on UML_NET
index e7582e1d248cc91796d94e2fcaa9a6f7b4ef62fc..16b3cebddafb97a2384829b61ce013b193bb54c6 100644 (file)
@@ -9,6 +9,7 @@
 slip-objs := slip_kern.o slip_user.o
 slirp-objs := slirp_kern.o slirp_user.o
 daemon-objs := daemon_kern.o daemon_user.o
+vector-objs := vector_kern.o vector_user.o vector_transports.o
 umcast-objs := umcast_kern.o umcast_user.o
 net-objs := net_kern.o net_user.o
 mconsole-objs := mconsole_kern.o mconsole_user.o
@@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
 obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
 obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
 obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 
+obj-$(CONFIG_UML_NET_VECTOR) += vector.o
 obj-$(CONFIG_UML_NET_VDE) += vde.o
 obj-$(CONFIG_UML_NET_MCAST) += umcast.o
 obj-$(CONFIG_UML_NET_PCAP) += pcap.o
@@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
 
 # pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
 CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
 
 include arch/um/scripts/Makefile.rules
index acbe6c67afbaa1efe09952b9c1987b17b5afa58b..05588f9466c7ff1a1b028cb710cab37b65b0d7ba 100644 (file)
@@ -171,56 +171,19 @@ int enable_chan(struct line *line)
        return err;
 }
 
-/* Items are added in IRQ context, when free_irq can't be called, and
- * removed in process context, when it can.
- * This handles interrupt sources which disappear, and which need to
- * be permanently disabled.  This is discovered in IRQ context, but
- * the freeing of the IRQ must be done later.
- */
-static DEFINE_SPINLOCK(irqs_to_free_lock);
-static LIST_HEAD(irqs_to_free);
-
-void free_irqs(void)
-{
-       struct chan *chan;
-       LIST_HEAD(list);
-       struct list_head *ele;
-       unsigned long flags;
-
-       spin_lock_irqsave(&irqs_to_free_lock, flags);
-       list_splice_init(&irqs_to_free, &list);
-       spin_unlock_irqrestore(&irqs_to_free_lock, flags);
-
-       list_for_each(ele, &list) {
-               chan = list_entry(ele, struct chan, free_list);
-
-               if (chan->input && chan->enabled)
-                       um_free_irq(chan->line->driver->read_irq, chan);
-               if (chan->output && chan->enabled)
-                       um_free_irq(chan->line->driver->write_irq, chan);
-               chan->enabled = 0;
-       }
-}
-
 static void close_one_chan(struct chan *chan, int delay_free_irq)
 {
-       unsigned long flags;
-
        if (!chan->opened)
                return;
 
-       if (delay_free_irq) {
-               spin_lock_irqsave(&irqs_to_free_lock, flags);
-               list_add(&chan->free_list, &irqs_to_free);
-               spin_unlock_irqrestore(&irqs_to_free_lock, flags);
-       }
-       else {
-               if (chan->input && chan->enabled)
-                       um_free_irq(chan->line->driver->read_irq, chan);
-               if (chan->output && chan->enabled)
-                       um_free_irq(chan->line->driver->write_irq, chan);
-               chan->enabled = 0;
-       }
+    /* we can safely call free now - it will be marked
+     *  as free and freed once the IRQ stopped processing
+     */
+       if (chan->input && chan->enabled)
+               um_free_irq(chan->line->driver->read_irq, chan);
+       if (chan->output && chan->enabled)
+               um_free_irq(chan->line->driver->write_irq, chan);
+       chan->enabled = 0;
        if (chan->ops->close != NULL)
                (*chan->ops->close)(chan->fd, chan->data);
 
index 366e57f5e8d635b59990e7dcc8d6b8b5b26fb75a..8d80b27502e6ae4feb235d01801a67be5ca8cce4 100644 (file)
@@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
        if (err)
                return err;
        if (output)
-               err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+               err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
                                     line_write_interrupt, IRQF_SHARED,
                                     driver->write_irq_name, data);
        return err;
index b305f8247909563551a9d4a9ba20c81c000e9722..3ef1b48e064a87dd33465ff083c604c4657b8bb2 100644 (file)
@@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(struct timer_list *t)
 #endif
 }
 
-static void setup_etheraddr(struct net_device *dev, char *str)
+void uml_net_setup_etheraddr(struct net_device *dev, char *str)
 {
        unsigned char *addr = dev->dev_addr;
        char *end;
@@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac,
         */
        snprintf(dev->name, sizeof(dev->name), "eth%d", n);
 
-       setup_etheraddr(dev, mac);
+       uml_net_setup_etheraddr(dev, mac);
 
        printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
 
index 37c51a6be690c786b3966e1b41006e88630b7550..778a0e52d5a5c618fbeebf82679060ab3e7a6520 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/miscdevice.h>
 #include <linux/delay.h>
 #include <linux/uaccess.h>
+#include <init.h>
 #include <irq_kern.h>
 #include <os.h>
 
@@ -154,7 +155,14 @@ err_out_cleanup_hw:
 /*
  * rng_cleanup - shutdown RNG module
  */
-static void __exit rng_cleanup (void)
+
+static void cleanup(void)
+{
+       free_irq_by_fd(random_fd);
+       os_close_file(random_fd);
+}
+
+static void __exit rng_cleanup(void)
 {
        os_close_file(random_fd);
        misc_deregister (&rng_miscdev);
@@ -162,6 +170,7 @@ static void __exit rng_cleanup (void)
 
 module_init (rng_init);
 module_exit (rng_cleanup);
+__uml_exitcall(cleanup);
 
 MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver");
 MODULE_LICENSE("GPL");
index b55fe9bf5d3e2859309cef0420fd9493a041000e..d4e8c497ae868a611edba78d21a798d7755c1c12 100644 (file)
@@ -1587,11 +1587,11 @@ int io_thread(void *arg)
 
                do {
                        res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
-                       if (res > 0) {
+                       if (res >= 0) {
                                written += res;
                        } else {
                                if (res != -EAGAIN) {
-                                       printk("io_thread - read failed, fd = %d, "
+                                       printk("io_thread - write failed, fd = %d, "
                                               "err = %d\n", kernel_fd, -n);
                                }
                        }
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
new file mode 100644 (file)
index 0000000..02168fe
--- /dev/null
@@ -0,0 +1,1633 @@
+/*
+ * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
+ * James Leu (jleu@mindspring.net).
+ * Copyright (C) 2001 by various other people who didn't put their name here.
+ * Licensed under the GPL.
+ */
+
+#include <linux/version.h>
+#include <linux/bootmem.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <init.h>
+#include <irq_kern.h>
+#include <irq_user.h>
+#include <net_kern.h>
+#include <os.h>
+#include "mconsole_kern.h"
+#include "vector_user.h"
+#include "vector_kern.h"
+
+/*
+ * Adapted from network devices with the following major changes:
+ * All transports are static - simplifies the code significantly
+ * Multiple FDs/IRQs per device
+ * Vector IO optionally used for read/write, falling back to legacy
+ * based on configuration and/or availability
+ * Configuration is no longer positional - L2TPv3 and GRE require up to
+ * 10 parameters, passing this as positional is not fit for purpose.
+ * Only socket transports are supported
+ */
+
+
+#define DRIVER_NAME "uml-vector"
+#define DRIVER_VERSION "01"
+struct vector_cmd_line_arg {
+       struct list_head list;
+       int unit;
+       char *arguments;
+};
+
+struct vector_device {
+       struct list_head list;
+       struct net_device *dev;
+       struct platform_device pdev;
+       int unit;
+       int opened;
+};
+
+static LIST_HEAD(vec_cmd_line);
+
+static DEFINE_SPINLOCK(vector_devices_lock);
+static LIST_HEAD(vector_devices);
+
+static int driver_registered;
+
+static void vector_eth_configure(int n, struct arglist *def);
+
+/* Argument accessors to set variables (and/or set default values)
+ * mtu, buffer sizing, default headroom, etc
+ */
+
+#define DEFAULT_HEADROOM 2
+#define SAFETY_MARGIN 32
+#define DEFAULT_VECTOR_SIZE 64
+#define TX_SMALL_PACKET 128
+#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
+
+static const struct {
+       const char string[ETH_GSTRING_LEN];
+} ethtool_stats_keys[] = {
+       { "rx_queue_max" },
+       { "rx_queue_running_average" },
+       { "tx_queue_max" },
+       { "tx_queue_running_average" },
+       { "rx_encaps_errors" },
+       { "tx_timeout_count" },
+       { "tx_restart_queue" },
+       { "tx_kicks" },
+       { "tx_flow_control_xon" },
+       { "tx_flow_control_xoff" },
+       { "rx_csum_offload_good" },
+       { "rx_csum_offload_errors"},
+       { "sg_ok"},
+       { "sg_linearized"},
+};
+
+#define VECTOR_NUM_STATS       ARRAY_SIZE(ethtool_stats_keys)
+
+static void vector_reset_stats(struct vector_private *vp)
+{
+       vp->estats.rx_queue_max = 0;
+       vp->estats.rx_queue_running_average = 0;
+       vp->estats.tx_queue_max = 0;
+       vp->estats.tx_queue_running_average = 0;
+       vp->estats.rx_encaps_errors = 0;
+       vp->estats.tx_timeout_count = 0;
+       vp->estats.tx_restart_queue = 0;
+       vp->estats.tx_kicks = 0;
+       vp->estats.tx_flow_control_xon = 0;
+       vp->estats.tx_flow_control_xoff = 0;
+       vp->estats.sg_ok = 0;
+       vp->estats.sg_linearized = 0;
+}
+
+static int get_mtu(struct arglist *def)
+{
+       char *mtu = uml_vector_fetch_arg(def, "mtu");
+       long result;
+
+       if (mtu != NULL) {
+               if (kstrtoul(mtu, 10, &result) == 0)
+                       return result;
+       }
+       return ETH_MAX_PACKET;
+}
+
+static int get_depth(struct arglist *def)
+{
+       char *mtu = uml_vector_fetch_arg(def, "depth");
+       long result;
+
+       if (mtu != NULL) {
+               if (kstrtoul(mtu, 10, &result) == 0)
+                       return result;
+       }
+       return DEFAULT_VECTOR_SIZE;
+}
+
+static int get_headroom(struct arglist *def)
+{
+       char *mtu = uml_vector_fetch_arg(def, "headroom");
+       long result;
+
+       if (mtu != NULL) {
+               if (kstrtoul(mtu, 10, &result) == 0)
+                       return result;
+       }
+       return DEFAULT_HEADROOM;
+}
+
+static int get_req_size(struct arglist *def)
+{
+       char *gro = uml_vector_fetch_arg(def, "gro");
+       long result;
+
+       if (gro != NULL) {
+               if (kstrtoul(gro, 10, &result) == 0) {
+                       if (result > 0)
+                               return 65536;
+               }
+       }
+       return get_mtu(def) + ETH_HEADER_OTHER +
+               get_headroom(def) + SAFETY_MARGIN;
+}
+
+
+static int get_transport_options(struct arglist *def)
+{
+       char *transport = uml_vector_fetch_arg(def, "transport");
+       char *vector = uml_vector_fetch_arg(def, "vec");
+
+       int vec_rx = VECTOR_RX;
+       int vec_tx = VECTOR_TX;
+       long parsed;
+
+       if (vector != NULL) {
+               if (kstrtoul(vector, 10, &parsed) == 0) {
+                       if (parsed == 0) {
+                               vec_rx = 0;
+                               vec_tx = 0;
+                       }
+               }
+       }
+
+
+       if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+               return (vec_rx | VECTOR_BPF);
+       if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+               return (vec_rx | vec_tx);
+       return (vec_rx | vec_tx);
+}
+
+
+/* A mini-buffer for packet drop read
+ * All of our supported transports are datagram oriented and we always
+ * read using recvmsg or recvmmsg. If we pass a buffer which is smaller
+ * than the packet size it still counts as full packet read and will
+ * clean the incoming stream to keep sigio/epoll happy
+ */
+
+#define DROP_BUFFER_SIZE 32
+
+static char *drop_buffer;
+
+/* Array backed queues optimized for bulk enqueue/dequeue and
+ * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
+ * For more details and full design rationale see
+ * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
+ */
+
+
+/*
+ * Advance the mmsg queue head by n = advance. Resets the queue to
+ * maximum enqueue/dequeue-at-once capacity if possible. Called by
+ * dequeuers. Caller must hold the head_lock!
+ */
+
+static int vector_advancehead(struct vector_queue *qi, int advance)
+{
+       int queue_depth;
+
+       qi->head =
+               (qi->head + advance)
+                       % qi->max_depth;
+
+
+       spin_lock(&qi->tail_lock);
+       qi->queue_depth -= advance;
+
+       /* we are at 0, use this to
+        * reset head and tail so we can use max size vectors
+        */
+
+       if (qi->queue_depth == 0) {
+               qi->head = 0;
+               qi->tail = 0;
+       }
+       queue_depth = qi->queue_depth;
+       spin_unlock(&qi->tail_lock);
+       return queue_depth;
+}
+
+/*     Advance the queue tail by n = advance.
+ *     This is called by enqueuers which should hold the
+ *     head lock already
+ */
+
+static int vector_advancetail(struct vector_queue *qi, int advance)
+{
+       int queue_depth;
+
+       qi->tail =
+               (qi->tail + advance)
+                       % qi->max_depth;
+       spin_lock(&qi->head_lock);
+       qi->queue_depth += advance;
+       queue_depth = qi->queue_depth;
+       spin_unlock(&qi->head_lock);
+       return queue_depth;
+}
+
+static int prep_msg(struct vector_private *vp,
+       struct sk_buff *skb,
+       struct iovec *iov)
+{
+       int iov_index = 0;
+       int nr_frags, frag;
+       skb_frag_t *skb_frag;
+
+       nr_frags = skb_shinfo(skb)->nr_frags;
+       if (nr_frags > MAX_IOV_SIZE) {
+               if (skb_linearize(skb) != 0)
+                       goto drop;
+       }
+       if (vp->header_size > 0) {
+               iov[iov_index].iov_len = vp->header_size;
+               vp->form_header(iov[iov_index].iov_base, skb, vp);
+               iov_index++;
+       }
+       iov[iov_index].iov_base = skb->data;
+       if (nr_frags > 0) {
+               iov[iov_index].iov_len = skb->len - skb->data_len;
+               vp->estats.sg_ok++;
+       } else
+               iov[iov_index].iov_len = skb->len;
+       iov_index++;
+       for (frag = 0; frag < nr_frags; frag++) {
+               skb_frag = &skb_shinfo(skb)->frags[frag];
+               iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+               iov[iov_index].iov_len = skb_frag_size(skb_frag);
+               iov_index++;
+       }
+       return iov_index;
+drop:
+       return -1;
+}
+/*
+ * Generic vector enqueue with support for forming headers using transport
+ * specific callback. Allows GRE, L2TPv3, RAW and other transports
+ * to use a common enqueue procedure in vector mode
+ */
+
+static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
+{
+       struct vector_private *vp = netdev_priv(qi->dev);
+       int queue_depth;
+       int packet_len;
+       struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+       int iov_count;
+
+       spin_lock(&qi->tail_lock);
+       spin_lock(&qi->head_lock);
+       queue_depth = qi->queue_depth;
+       spin_unlock(&qi->head_lock);
+
+       if (skb)
+               packet_len = skb->len;
+
+       if (queue_depth < qi->max_depth) {
+
+               *(qi->skbuff_vector + qi->tail) = skb;
+               mmsg_vector += qi->tail;
+               iov_count = prep_msg(
+                       vp,
+                       skb,
+                       mmsg_vector->msg_hdr.msg_iov
+               );
+               if (iov_count < 1)
+                       goto drop;
+               mmsg_vector->msg_hdr.msg_iovlen = iov_count;
+               mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
+               mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
+               queue_depth = vector_advancetail(qi, 1);
+       } else
+               goto drop;
+       spin_unlock(&qi->tail_lock);
+       return queue_depth;
+drop:
+       qi->dev->stats.tx_dropped++;
+       if (skb != NULL) {
+               packet_len = skb->len;
+               dev_consume_skb_any(skb);
+               netdev_completed_queue(qi->dev, 1, packet_len);
+       }
+       spin_unlock(&qi->tail_lock);
+       return queue_depth;
+}
+
+static int consume_vector_skbs(struct vector_queue *qi, int count)
+{
+       struct sk_buff *skb;
+       int skb_index;
+       int bytes_compl = 0;
+
+       for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
+               skb = *(qi->skbuff_vector + skb_index);
+               /* mark as empty to ensure correct destruction if
+                * needed
+                */
+               bytes_compl += skb->len;
+               *(qi->skbuff_vector + skb_index) = NULL;
+               dev_consume_skb_any(skb);
+       }
+       qi->dev->stats.tx_bytes += bytes_compl;
+       qi->dev->stats.tx_packets += count;
+       netdev_completed_queue(qi->dev, count, bytes_compl);
+       return vector_advancehead(qi, count);
+}
+
+/*
+ * Generic vector deque via sendmmsg with support for forming headers
+ * using transport specific callback. Allows GRE, L2TPv3, RAW and
+ * other transports to use a common dequeue procedure in vector mode
+ */
+
+
+static int vector_send(struct vector_queue *qi)
+{
+       struct vector_private *vp = netdev_priv(qi->dev);
+       struct mmsghdr *send_from;
+       int result = 0, send_len, queue_depth = qi->max_depth;
+
+       if (spin_trylock(&qi->head_lock)) {
+               if (spin_trylock(&qi->tail_lock)) {
+                       /* update queue_depth to current value */
+                       queue_depth = qi->queue_depth;
+                       spin_unlock(&qi->tail_lock);
+                       while (queue_depth > 0) {
+                               /* Calculate the start of the vector */
+                               send_len = queue_depth;
+                               send_from = qi->mmsg_vector;
+                               send_from += qi->head;
+                               /* Adjust vector size if wraparound */
+                               if (send_len + qi->head > qi->max_depth)
+                                       send_len = qi->max_depth - qi->head;
+                               /* Try to TX as many packets as possible */
+                               if (send_len > 0) {
+                                       result = uml_vector_sendmmsg(
+                                                vp->fds->tx_fd,
+                                                send_from,
+                                                send_len,
+                                                0
+                                       );
+                                       vp->in_write_poll =
+                                               (result != send_len);
+                               }
+                               /* For some of the sendmmsg error scenarios
+                                * we may end being unsure in the TX success
+                                * for all packets. It is safer to declare
+                                * them all TX-ed and blame the network.
+                                */
+                               if (result < 0) {
+                                       if (net_ratelimit())
+                                               netdev_err(vp->dev, "sendmmsg err=%i\n",
+                                                       result);
+                                       result = send_len;
+                               }
+                               if (result > 0) {
+                                       queue_depth =
+                                               consume_vector_skbs(qi, result);
+                                       /* This is equivalent to an TX IRQ.
+                                        * Restart the upper layers to feed us
+                                        * more packets.
+                                        */
+                                       if (result > vp->estats.tx_queue_max)
+                                               vp->estats.tx_queue_max = result;
+                                       vp->estats.tx_queue_running_average =
+                                               (vp->estats.tx_queue_running_average + result) >> 1;
+                               }
+                               netif_trans_update(qi->dev);
+                               netif_wake_queue(qi->dev);
+                               /* if TX is busy, break out of the send loop,
+                                *  poll write IRQ will reschedule xmit for us
+                                */
+                               if (result != send_len) {
+                                       vp->estats.tx_restart_queue++;
+                                       break;
+                               }
+                       }
+               }
+               spin_unlock(&qi->head_lock);
+       } else {
+               tasklet_schedule(&vp->tx_poll);
+       }
+       return queue_depth;
+}
+
+/* Queue destructor. Deliberately stateless so we can use
+ * it in queue cleanup if initialization fails.
+ */
+
+static void destroy_queue(struct vector_queue *qi)
+{
+       int i;
+       struct iovec *iov;
+       struct vector_private *vp = netdev_priv(qi->dev);
+       struct mmsghdr *mmsg_vector;
+
+       if (qi == NULL)
+               return;
+       /* deallocate any skbuffs - we rely on any unused to be
+        * set to NULL.
+        */
+       if (qi->skbuff_vector != NULL) {
+               for (i = 0; i < qi->max_depth; i++) {
+                       if (*(qi->skbuff_vector + i) != NULL)
+                               dev_kfree_skb_any(*(qi->skbuff_vector + i));
+               }
+               kfree(qi->skbuff_vector);
+       }
+       /* deallocate matching IOV structures including header buffs */
+       if (qi->mmsg_vector != NULL) {
+               mmsg_vector = qi->mmsg_vector;
+               for (i = 0; i < qi->max_depth; i++) {
+                       iov = mmsg_vector->msg_hdr.msg_iov;
+                       if (iov != NULL) {
+                               if ((vp->header_size > 0) &&
+                                       (iov->iov_base != NULL))
+                                       kfree(iov->iov_base);
+                               kfree(iov);
+                       }
+                       mmsg_vector++;
+               }
+               kfree(qi->mmsg_vector);
+       }
+       kfree(qi);
+}
+
+/*
+ * Queue constructor. Create a queue with a given side.
+ */
+static struct vector_queue *create_queue(
+       struct vector_private *vp,
+       int max_size,
+       int header_size,
+       int num_extra_frags)
+{
+       struct vector_queue *result;
+       int i;
+       struct iovec *iov;
+       struct mmsghdr *mmsg_vector;
+
+       result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
+       if (result == NULL)
+               goto out_fail;
+       result->max_depth = max_size;
+       result->dev = vp->dev;
+       result->mmsg_vector = kmalloc(
+               (sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
+       result->skbuff_vector = kmalloc(
+               (sizeof(void *) * max_size), GFP_KERNEL);
+       if (result->mmsg_vector == NULL || result->skbuff_vector == NULL)
+               goto out_fail;
+
+       mmsg_vector = result->mmsg_vector;
+       for (i = 0; i < max_size; i++) {
+               /* Clear all pointers - we use non-NULL as marking on
+                * what to free on destruction
+                */
+               *(result->skbuff_vector + i) = NULL;
+               mmsg_vector->msg_hdr.msg_iov = NULL;
+               mmsg_vector++;
+       }
+       mmsg_vector = result->mmsg_vector;
+       result->max_iov_frags = num_extra_frags;
+       for (i = 0; i < max_size; i++) {
+               if (vp->header_size > 0)
+                       iov = kmalloc(
+                               sizeof(struct iovec) * (3 + num_extra_frags),
+                               GFP_KERNEL
+                       );
+               else
+                       iov = kmalloc(
+                               sizeof(struct iovec) * (2 + num_extra_frags),
+                               GFP_KERNEL
+                       );
+               if (iov == NULL)
+                       goto out_fail;
+               mmsg_vector->msg_hdr.msg_iov = iov;
+               mmsg_vector->msg_hdr.msg_iovlen = 1;
+               mmsg_vector->msg_hdr.msg_control = NULL;
+               mmsg_vector->msg_hdr.msg_controllen = 0;
+               mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
+               mmsg_vector->msg_hdr.msg_name = NULL;
+               mmsg_vector->msg_hdr.msg_namelen = 0;
+               if (vp->header_size > 0) {
+                       iov->iov_base = kmalloc(header_size, GFP_KERNEL);
+                       if (iov->iov_base == NULL)
+                               goto out_fail;
+                       iov->iov_len = header_size;
+                       mmsg_vector->msg_hdr.msg_iovlen = 2;
+                       iov++;
+               }
+               iov->iov_base = NULL;
+               iov->iov_len = 0;
+               mmsg_vector++;
+       }
+       spin_lock_init(&result->head_lock);
+       spin_lock_init(&result->tail_lock);
+       result->queue_depth = 0;
+       result->head = 0;
+       result->tail = 0;
+       return result;
+out_fail:
+       destroy_queue(result);
+       return NULL;
+}
+
+/*
+ * We do not use the RX queue as a proper wraparound queue for now
+ * This is not necessary because the consumption via netif_rx()
+ * happens in-line. While we can try using the return code of
+ * netif_rx() for flow control there are no drivers doing this today.
+ * For this RX specific use we ignore the tail/head locks and
+ * just read into a prepared queue filled with skbuffs.
+ */
+
+static struct sk_buff *prep_skb(
+       struct vector_private *vp,
+       struct user_msghdr *msg)
+{
+       int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+       struct sk_buff *result;
+       int iov_index = 0, len;
+       struct iovec *iov = msg->msg_iov;
+       int err, nr_frags, frag;
+       skb_frag_t *skb_frag;
+
+       if (vp->req_size <= linear)
+               len = linear;
+       else
+               len = vp->req_size;
+       result = alloc_skb_with_frags(
+               linear,
+               len - vp->max_packet,
+               3,
+               &err,
+               GFP_ATOMIC
+       );
+       if (vp->header_size > 0)
+               iov_index++;
+       if (result == NULL) {
+               iov[iov_index].iov_base = NULL;
+               iov[iov_index].iov_len = 0;
+               goto done;
+       }
+       skb_reserve(result, vp->headroom);
+       result->dev = vp->dev;
+       skb_put(result, vp->max_packet);
+       result->data_len = len - vp->max_packet;
+       result->len += len - vp->max_packet;
+       skb_reset_mac_header(result);
+       result->ip_summed = CHECKSUM_NONE;
+       iov[iov_index].iov_base = result->data;
+       iov[iov_index].iov_len = vp->max_packet;
+       iov_index++;
+
+       nr_frags = skb_shinfo(result)->nr_frags;
+       for (frag = 0; frag < nr_frags; frag++) {
+               skb_frag = &skb_shinfo(result)->frags[frag];
+               iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+               if (iov[iov_index].iov_base != NULL)
+                       iov[iov_index].iov_len = skb_frag_size(skb_frag);
+               else
+                       iov[iov_index].iov_len = 0;
+               iov_index++;
+       }
+done:
+       msg->msg_iovlen = iov_index;
+       return result;
+}
+
+
+/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
+
+static void prep_queue_for_rx(struct vector_queue *qi)
+{
+       struct vector_private *vp = netdev_priv(qi->dev);
+       struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+       void **skbuff_vector = qi->skbuff_vector;
+       int i;
+
+       if (qi->queue_depth == 0)
+               return;
+       for (i = 0; i < qi->queue_depth; i++) {
+               /* it is OK if allocation fails - recvmmsg with NULL data in
+                * iov argument still performs an RX, just drops the packet
+                * This allows us stop faffing around with a "drop buffer"
+                */
+
+               *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
+               skbuff_vector++;
+               mmsg_vector++;
+       }
+       qi->queue_depth = 0;
+}
+
+static struct vector_device *find_device(int n)
+{
+       struct vector_device *device;
+       struct list_head *ele;
+
+       spin_lock(&vector_devices_lock);
+       list_for_each(ele, &vector_devices) {
+               device = list_entry(ele, struct vector_device, list);
+               if (device->unit == n)
+                       goto out;
+       }
+       device = NULL;
+ out:
+       spin_unlock(&vector_devices_lock);
+       return device;
+}
+
+static int vector_parse(char *str, int *index_out, char **str_out,
+                       char **error_out)
+{
+       int n, len, err;
+       char *start = str;
+
+       len = strlen(str);
+
+       while ((*str != ':') && (strlen(str) > 1))
+               str++;
+       if (*str != ':') {
+               *error_out = "Expected ':' after device number";
+               return -EINVAL;
+       }
+       *str = '\0';
+
+       err = kstrtouint(start, 0, &n);
+       if (err < 0) {
+               *error_out = "Bad device number";
+               return err;
+       }
+
+       str++;
+       if (find_device(n)) {
+               *error_out = "Device already configured";
+               return -EINVAL;
+       }
+
+       *index_out = n;
+       *str_out = str;
+       return 0;
+}
+
+static int vector_config(char *str, char **error_out)
+{
+       int err, n;
+       char *params;
+       struct arglist *parsed;
+
+       err = vector_parse(str, &n, &params, error_out);
+       if (err != 0)
+               return err;
+
+       /* This string is broken up and the pieces used by the underlying
+        * driver. We should copy it to make sure things do not go wrong
+        * later.
+        */
+
+       params = kstrdup(params, GFP_KERNEL);
+       if (params == NULL) {
+               *error_out = "vector_config failed to strdup string";
+               return -ENOMEM;
+       }
+
+       parsed = uml_parse_vector_ifspec(params);
+
+       if (parsed == NULL) {
+               *error_out = "vector_config failed to parse parameters";
+               return -EINVAL;
+       }
+
+       vector_eth_configure(n, parsed);
+       return 0;
+}
+
+static int vector_id(char **str, int *start_out, int *end_out)
+{
+       char *end;
+       int n;
+
+       n = simple_strtoul(*str, &end, 0);
+       if ((*end != '\0') || (end == *str))
+               return -1;
+
+       *start_out = n;
+       *end_out = n;
+       *str = end;
+       return n;
+}
+
+static int vector_remove(int n, char **error_out)
+{
+       struct vector_device *vec_d;
+       struct net_device *dev;
+       struct vector_private *vp;
+
+       vec_d = find_device(n);
+       if (vec_d == NULL)
+               return -ENODEV;
+       dev = vec_d->dev;
+       vp = netdev_priv(dev);
+       if (vp->fds != NULL)
+               return -EBUSY;
+       unregister_netdev(dev);
+       platform_device_unregister(&vec_d->pdev);
+       return 0;
+}
+
+/*
+ * There is no shared per-transport initialization code, so
+ * we will just initialize each interface one by one and
+ * add them to a list
+ */
+
+static struct platform_driver uml_net_driver = {
+       .driver = {
+               .name = DRIVER_NAME,
+       },
+};
+
+
+static void vector_device_release(struct device *dev)
+{
+       struct vector_device *device = dev_get_drvdata(dev);
+       struct net_device *netdev = device->dev;
+
+       list_del(&device->list);
+       kfree(device);
+       free_netdev(netdev);
+}
+
+/* Bog standard recv using recvmsg - not used normally unless the user
+ * explicitly specifies not to use recvmmsg vector RX.
+ */
+
+static int vector_legacy_rx(struct vector_private *vp)
+{
+       int pkt_len;
+       struct user_msghdr hdr;
+       struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
+       int iovpos = 0;
+       struct sk_buff *skb;
+       int header_check;
+
+       hdr.msg_name = NULL;
+       hdr.msg_namelen = 0;
+       hdr.msg_iov = (struct iovec *) &iov;
+       hdr.msg_control = NULL;
+       hdr.msg_controllen = 0;
+       hdr.msg_flags = 0;
+
+       if (vp->header_size > 0) {
+               iov[0].iov_base = vp->header_rxbuffer;
+               iov[0].iov_len = vp->header_size;
+       }
+
+       skb = prep_skb(vp, &hdr);
+
+       if (skb == NULL) {
+               /* Read a packet into drop_buffer and don't do
+                * anything with it.
+                */
+               iov[iovpos].iov_base = drop_buffer;
+               iov[iovpos].iov_len = DROP_BUFFER_SIZE;
+               hdr.msg_iovlen = 1;
+               vp->dev->stats.rx_dropped++;
+       }
+
+       pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
+
+       if (skb != NULL) {
+               if (pkt_len > vp->header_size) {
+                       if (vp->header_size > 0) {
+                               header_check = vp->verify_header(
+                                       vp->header_rxbuffer, skb, vp);
+                               if (header_check < 0) {
+                                       dev_kfree_skb_irq(skb);
+                                       vp->dev->stats.rx_dropped++;
+                                       vp->estats.rx_encaps_errors++;
+                                       return 0;
+                               }
+                               if (header_check > 0) {
+                                       vp->estats.rx_csum_offload_good++;
+                                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                               }
+                       }
+                       pskb_trim(skb, pkt_len - vp->rx_header_size);
+                       skb->protocol = eth_type_trans(skb, skb->dev);
+                       vp->dev->stats.rx_bytes += skb->len;
+                       vp->dev->stats.rx_packets++;
+                       netif_rx(skb);
+               } else {
+                       dev_kfree_skb_irq(skb);
+               }
+       }
+       return pkt_len;
+}
+
+/*
+ * Packet at a time TX which falls back to vector TX if the
+ * underlying transport is busy.
+ */
+
+
+
+static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
+{
+       struct iovec iov[3 + MAX_IOV_SIZE];
+       int iov_count, pkt_len = 0;
+
+       iov[0].iov_base = vp->header_txbuffer;
+       iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
+
+       if (iov_count < 1)
+               goto drop;
+       pkt_len = uml_vector_writev(
+               vp->fds->tx_fd,
+               (struct iovec *) &iov,
+               iov_count
+       );
+
+       netif_trans_update(vp->dev);
+       netif_wake_queue(vp->dev);
+
+       if (pkt_len > 0) {
+               vp->dev->stats.tx_bytes += skb->len;
+               vp->dev->stats.tx_packets++;
+       } else {
+               vp->dev->stats.tx_dropped++;
+       }
+       consume_skb(skb);
+       return pkt_len;
+drop:
+       vp->dev->stats.tx_dropped++;
+       consume_skb(skb);
+       return pkt_len;
+}
+
+/*
+ * Receive as many messages as we can in one call using the special
+ * mmsg vector matched to an skb vector which we prepared earlier.
+ */
+
+static int vector_mmsg_rx(struct vector_private *vp)
+{
+       int packet_count, i;
+       struct vector_queue *qi = vp->rx_queue;
+       struct sk_buff *skb;
+       struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+       void **skbuff_vector = qi->skbuff_vector;
+       int header_check;
+
+       /* Refresh the vector and make sure it is with new skbs and the
+        * iovs are updated to point to them.
+        */
+
+       prep_queue_for_rx(qi);
+
+       /* Fire the Lazy Gun - get as many packets as we can in one go. */
+
+       packet_count = uml_vector_recvmmsg(
+               vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
+
+       if (packet_count <= 0)
+               return packet_count;
+
+       /* We treat packet processing as enqueue, buffer refresh as dequeue
+        * The queue_depth tells us how many buffers have been used and how
+        * many do we need to prep the next time prep_queue_for_rx() is called.
+        */
+
+       qi->queue_depth = packet_count;
+
+       for (i = 0; i < packet_count; i++) {
+               skb = (*skbuff_vector);
+               if (mmsg_vector->msg_len > vp->header_size) {
+                       if (vp->header_size > 0) {
+                               header_check = vp->verify_header(
+                                       mmsg_vector->msg_hdr.msg_iov->iov_base,
+                                       skb,
+                                       vp
+                               );
+                               if (header_check < 0) {
+                               /* Overlay header failed to verify - discard.
+                                * We can actually keep this skb and reuse it,
+                                * but that will make the prep logic too
+                                * complex.
+                                */
+                                       dev_kfree_skb_irq(skb);
+                                       vp->estats.rx_encaps_errors++;
+                                       continue;
+                               }
+                               if (header_check > 0) {
+                                       vp->estats.rx_csum_offload_good++;
+                                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                               }
+                       }
+                       pskb_trim(skb,
+                               mmsg_vector->msg_len - vp->rx_header_size);
+                       skb->protocol = eth_type_trans(skb, skb->dev);
+                       /*
+                        * We do not need to lock on updating stats here
+                        * The interrupt loop is non-reentrant.
+                        */
+                       vp->dev->stats.rx_bytes += skb->len;
+                       vp->dev->stats.rx_packets++;
+                       netif_rx(skb);
+               } else {
+                       /* Overlay header too short to do anything - discard.
+                        * We can actually keep this skb and reuse it,
+                        * but that will make the prep logic too complex.
+                        */
+                       if (skb != NULL)
+                               dev_kfree_skb_irq(skb);
+               }
+               (*skbuff_vector) = NULL;
+               /* Move to the next buffer element */
+               mmsg_vector++;
+               skbuff_vector++;
+       }
+       if (packet_count > 0) {
+               if (vp->estats.rx_queue_max < packet_count)
+                       vp->estats.rx_queue_max = packet_count;
+               vp->estats.rx_queue_running_average =
+                       (vp->estats.rx_queue_running_average + packet_count) >> 1;
+       }
+       return packet_count;
+}
+
+static void vector_rx(struct vector_private *vp)
+{
+       int err;
+
+       if ((vp->options & VECTOR_RX) > 0)
+               while ((err = vector_mmsg_rx(vp)) > 0)
+                       ;
+       else
+               while ((err = vector_legacy_rx(vp)) > 0)
+                       ;
+       if ((err != 0) && net_ratelimit())
+               netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
+}
+
+static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct vector_private *vp = netdev_priv(dev);
+       int queue_depth = 0;
+
+       if ((vp->options & VECTOR_TX) == 0) {
+               writev_tx(vp, skb);
+               return NETDEV_TX_OK;
+       }
+
+       /* We do BQL only in the vector path, no point doing it in
+        * packet at a time mode as there is no device queue
+        */
+
+       netdev_sent_queue(vp->dev, skb->len);
+       queue_depth = vector_enqueue(vp->tx_queue, skb);
+
+       /* if the device queue is full, stop the upper layers and
+        * flush it.
+        */
+
+       if (queue_depth >= vp->tx_queue->max_depth - 1) {
+               vp->estats.tx_kicks++;
+               netif_stop_queue(dev);
+               vector_send(vp->tx_queue);
+               return NETDEV_TX_OK;
+       }
+       if (skb->xmit_more) {
+               mod_timer(&vp->tl, vp->coalesce);
+               return NETDEV_TX_OK;
+       }
+       if (skb->len < TX_SMALL_PACKET) {
+               vp->estats.tx_kicks++;
+               vector_send(vp->tx_queue);
+       } else
+               tasklet_schedule(&vp->tx_poll);
+       return NETDEV_TX_OK;
+}
+
+static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
+{
+       struct net_device *dev = dev_id;
+       struct vector_private *vp = netdev_priv(dev);
+
+       if (!netif_running(dev))
+               return IRQ_NONE;
+       vector_rx(vp);
+       return IRQ_HANDLED;
+
+}
+
+static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
+{
+       struct net_device *dev = dev_id;
+       struct vector_private *vp = netdev_priv(dev);
+
+       if (!netif_running(dev))
+               return IRQ_NONE;
+       /* We need to pay attention to it only if we got
+        * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
+        * we ignore it. In the future, it may be worth
+        * it to improve the IRQ controller a bit to make
+        * tweaking the IRQ mask less costly
+        */
+
+       if (vp->in_write_poll)
+               tasklet_schedule(&vp->tx_poll);
+       return IRQ_HANDLED;
+
+}
+
+static int irq_rr;
+
+static int vector_net_close(struct net_device *dev)
+{
+       struct vector_private *vp = netdev_priv(dev);
+       unsigned long flags;
+
+       netif_stop_queue(dev);
+       del_timer(&vp->tl);
+
+       if (vp->fds == NULL)
+               return 0;
+
+       /* Disable and free all IRQS */
+       if (vp->rx_irq > 0) {
+               um_free_irq(vp->rx_irq, dev);
+               vp->rx_irq = 0;
+       }
+       if (vp->tx_irq > 0) {
+               um_free_irq(vp->tx_irq, dev);
+               vp->tx_irq = 0;
+       }
+       tasklet_kill(&vp->tx_poll);
+       if (vp->fds->rx_fd > 0) {
+               os_close_file(vp->fds->rx_fd);
+               vp->fds->rx_fd = -1;
+       }
+       if (vp->fds->tx_fd > 0) {
+               os_close_file(vp->fds->tx_fd);
+               vp->fds->tx_fd = -1;
+       }
+       if (vp->bpf != NULL)
+               kfree(vp->bpf);
+       if (vp->fds->remote_addr != NULL)
+               kfree(vp->fds->remote_addr);
+       if (vp->transport_data != NULL)
+               kfree(vp->transport_data);
+       if (vp->header_rxbuffer != NULL)
+               kfree(vp->header_rxbuffer);
+       if (vp->header_txbuffer != NULL)
+               kfree(vp->header_txbuffer);
+       if (vp->rx_queue != NULL)
+               destroy_queue(vp->rx_queue);
+       if (vp->tx_queue != NULL)
+               destroy_queue(vp->tx_queue);
+       kfree(vp->fds);
+       vp->fds = NULL;
+       spin_lock_irqsave(&vp->lock, flags);
+       vp->opened = false;
+       spin_unlock_irqrestore(&vp->lock, flags);
+       return 0;
+}
+
+/* TX tasklet */
+
+static void vector_tx_poll(unsigned long data)
+{
+       struct vector_private *vp = (struct vector_private *)data;
+
+       vp->estats.tx_kicks++;
+       vector_send(vp->tx_queue);
+}
+static void vector_reset_tx(struct work_struct *work)
+{
+       struct vector_private *vp =
+               container_of(work, struct vector_private, reset_tx);
+       netdev_reset_queue(vp->dev);
+       netif_start_queue(vp->dev);
+       netif_wake_queue(vp->dev);
+}
+static int vector_net_open(struct net_device *dev)
+{
+       struct vector_private *vp = netdev_priv(dev);
+       unsigned long flags;
+       int err = -EINVAL;
+       struct vector_device *vdevice;
+
+       spin_lock_irqsave(&vp->lock, flags);
+       if (vp->opened) {
+               spin_unlock_irqrestore(&vp->lock, flags);
+               return -ENXIO;
+       }
+       vp->opened = true;
+       spin_unlock_irqrestore(&vp->lock, flags);
+
+       vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
+
+       if (vp->fds == NULL)
+               goto out_close;
+
+       if (build_transport_data(vp) < 0)
+               goto out_close;
+
+       if ((vp->options & VECTOR_RX) > 0) {
+               vp->rx_queue = create_queue(
+                       vp,
+                       get_depth(vp->parsed),
+                       vp->rx_header_size,
+                       MAX_IOV_SIZE
+               );
+               vp->rx_queue->queue_depth = get_depth(vp->parsed);
+       } else {
+               vp->header_rxbuffer = kmalloc(
+                       vp->rx_header_size,
+                       GFP_KERNEL
+               );
+               if (vp->header_rxbuffer == NULL)
+                       goto out_close;
+       }
+       if ((vp->options & VECTOR_TX) > 0) {
+               vp->tx_queue = create_queue(
+                       vp,
+                       get_depth(vp->parsed),
+                       vp->header_size,
+                       MAX_IOV_SIZE
+               );
+       } else {
+               vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
+               if (vp->header_txbuffer == NULL)
+                       goto out_close;
+       }
+
+       /* READ IRQ */
+       err = um_request_irq(
+               irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
+                       IRQ_READ, vector_rx_interrupt,
+                       IRQF_SHARED, dev->name, dev);
+       if (err != 0) {
+               netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
+               err = -ENETUNREACH;
+               goto out_close;
+       }
+       vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
+       dev->irq = irq_rr + VECTOR_BASE_IRQ;
+       irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+
+       /* WRITE IRQ - we need it only if we have vector TX */
+       if ((vp->options & VECTOR_TX) > 0) {
+               err = um_request_irq(
+                       irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
+                               IRQ_WRITE, vector_tx_interrupt,
+                               IRQF_SHARED, dev->name, dev);
+               if (err != 0) {
+                       netdev_err(dev,
+                               "vector_open: failed to get tx irq(%d)\n", err);
+                       err = -ENETUNREACH;
+                       goto out_close;
+               }
+               vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
+               irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+       }
+
+       if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
+               if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
+                       vp->options = vp->options | VECTOR_BPF;
+       }
+
+       if ((vp->options & VECTOR_BPF) != 0)
+               vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
+
+       netif_start_queue(dev);
+
+       /* clear buffer - it can happen that the host side of the interface
+        * is full when we get here. In this case, new data is never queued,
+        * SIGIOs never arrive, and the net never works.
+        */
+
+       vector_rx(vp);
+
+       vector_reset_stats(vp);
+       vdevice = find_device(vp->unit);
+       vdevice->opened = 1;
+
+       if ((vp->options & VECTOR_TX) != 0)
+               add_timer(&vp->tl);
+       return 0;
+out_close:
+       vector_net_close(dev);
+       return err;
+}
+
+
+static void vector_net_set_multicast_list(struct net_device *dev)
+{
+       /* TODO: - we can do some BPF games here */
+       return;
+}
+
+static void vector_net_tx_timeout(struct net_device *dev)
+{
+       struct vector_private *vp = netdev_priv(dev);
+
+       vp->estats.tx_timeout_count++;
+       netif_trans_update(dev);
+       schedule_work(&vp->reset_tx);
+}
+
+static netdev_features_t vector_fix_features(struct net_device *dev,
+       netdev_features_t features)
+{
+       features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+       return features;
+}
+
+static int vector_set_features(struct net_device *dev,
+       netdev_features_t features)
+{
+       struct vector_private *vp = netdev_priv(dev);
+       /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
+        * no way to negotiate it on raw sockets, so we can change
+        * only our side.
+        */
+       if (features & NETIF_F_GRO)
+               /* All new frame buffers will be GRO-sized */
+               vp->req_size = 65536;
+       else
+               /* All new frame buffers will be normal sized */
+               vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+       return 0;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void vector_net_poll_controller(struct net_device *dev)
+{
+       disable_irq(dev->irq);
+       vector_rx_interrupt(dev->irq, dev);
+       enable_irq(dev->irq);
+}
+#endif
+
+static void vector_net_get_drvinfo(struct net_device *dev,
+                               struct ethtool_drvinfo *info)
+{
+       strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
+       strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
+}
+
+static void vector_get_ringparam(struct net_device *netdev,
+                               struct ethtool_ringparam *ring)
+{
+       struct vector_private *vp = netdev_priv(netdev);
+
+       ring->rx_max_pending = vp->rx_queue->max_depth;
+       ring->tx_max_pending = vp->tx_queue->max_depth;
+       ring->rx_pending = vp->rx_queue->max_depth;
+       ring->tx_pending = vp->tx_queue->max_depth;
+}
+
+static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+       switch (stringset) {
+       case ETH_SS_TEST:
+               *buf = '\0';
+               break;
+       case ETH_SS_STATS:
+               memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
+               break;
+       default:
+               WARN_ON(1);
+               break;
+       }
+}
+
+static int vector_get_sset_count(struct net_device *dev, int sset)
+{
+       switch (sset) {
+       case ETH_SS_TEST:
+               return 0;
+       case ETH_SS_STATS:
+               return VECTOR_NUM_STATS;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void vector_get_ethtool_stats(struct net_device *dev,
+       struct ethtool_stats *estats,
+       u64 *tmp_stats)
+{
+       struct vector_private *vp = netdev_priv(dev);
+
+       memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
+}
+
+static int vector_get_coalesce(struct net_device *netdev,
+                                       struct ethtool_coalesce *ec)
+{
+       struct vector_private *vp = netdev_priv(netdev);
+
+       ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
+       return 0;
+}
+
+static int vector_set_coalesce(struct net_device *netdev,
+                                       struct ethtool_coalesce *ec)
+{
+       struct vector_private *vp = netdev_priv(netdev);
+
+       vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
+       if (vp->coalesce == 0)
+               vp->coalesce = 1;
+       return 0;
+}
+
+static const struct ethtool_ops vector_net_ethtool_ops = {
+       .get_drvinfo    = vector_net_get_drvinfo,
+       .get_link       = ethtool_op_get_link,
+       .get_ts_info    = ethtool_op_get_ts_info,
+       .get_ringparam  = vector_get_ringparam,
+       .get_strings    = vector_get_strings,
+       .get_sset_count = vector_get_sset_count,
+       .get_ethtool_stats = vector_get_ethtool_stats,
+       .get_coalesce   = vector_get_coalesce,
+       .set_coalesce   = vector_set_coalesce,
+};
+
+
+static const struct net_device_ops vector_netdev_ops = {
+       .ndo_open               = vector_net_open,
+       .ndo_stop               = vector_net_close,
+       .ndo_start_xmit         = vector_net_start_xmit,
+       .ndo_set_rx_mode        = vector_net_set_multicast_list,
+       .ndo_tx_timeout         = vector_net_tx_timeout,
+       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_fix_features       = vector_fix_features,
+       .ndo_set_features       = vector_set_features,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller = vector_net_poll_controller,
+#endif
+};
+
+
+static void vector_timer_expire(struct timer_list *t)
+{
+       struct vector_private *vp = from_timer(vp, t, tl);
+
+       vp->estats.tx_kicks++;
+       vector_send(vp->tx_queue);
+}
+
+static void vector_eth_configure(
+               int n,
+               struct arglist *def
+       )
+{
+       struct vector_device *device;
+       struct net_device *dev;
+       struct vector_private *vp;
+       int err;
+
+       device = kzalloc(sizeof(*device), GFP_KERNEL);
+       if (device == NULL) {
+               printk(KERN_ERR "eth_configure failed to allocate struct "
+                                "vector_device\n");
+               return;
+       }
+       dev = alloc_etherdev(sizeof(struct vector_private));
+       if (dev == NULL) {
+               printk(KERN_ERR "eth_configure: failed to allocate struct "
+                                "net_device for vec%d\n", n);
+               goto out_free_device;
+       }
+
+       dev->mtu = get_mtu(def);
+
+       INIT_LIST_HEAD(&device->list);
+       device->unit = n;
+
+       /* If this name ends up conflicting with an existing registered
+        * netdevice, that is OK, register_netdev{,ice}() will notice this
+        * and fail.
+        */
+       snprintf(dev->name, sizeof(dev->name), "vec%d", n);
+       uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
+       vp = netdev_priv(dev);
+
+       /* sysfs register */
+       if (!driver_registered) {
+               platform_driver_register(&uml_net_driver);
+               driver_registered = 1;
+       }
+       device->pdev.id = n;
+       device->pdev.name = DRIVER_NAME;
+       device->pdev.dev.release = vector_device_release;
+       dev_set_drvdata(&device->pdev.dev, device);
+       if (platform_device_register(&device->pdev))
+               goto out_free_netdev;
+       SET_NETDEV_DEV(dev, &device->pdev.dev);
+
+       device->dev = dev;
+
+       *vp = ((struct vector_private)
+               {
+               .list                   = LIST_HEAD_INIT(vp->list),
+               .dev                    = dev,
+               .unit                   = n,
+               .options                = get_transport_options(def),
+               .rx_irq                 = 0,
+               .tx_irq                 = 0,
+               .parsed                 = def,
+               .max_packet             = get_mtu(def) + ETH_HEADER_OTHER,
+               /* TODO - we need to calculate headroom so that ip header
+                * is 16 byte aligned all the time
+                */
+               .headroom               = get_headroom(def),
+               .form_header            = NULL,
+               .verify_header          = NULL,
+               .header_rxbuffer        = NULL,
+               .header_txbuffer        = NULL,
+               .header_size            = 0,
+               .rx_header_size         = 0,
+               .rexmit_scheduled       = false,
+               .opened                 = false,
+               .transport_data         = NULL,
+               .in_write_poll          = false,
+               .coalesce               = 2,
+               .req_size               = get_req_size(def)
+               });
+
+       dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
+       tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
+       INIT_WORK(&vp->reset_tx, vector_reset_tx);
+
+       timer_setup(&vp->tl, vector_timer_expire, 0);
+       spin_lock_init(&vp->lock);
+
+       /* FIXME */
+       dev->netdev_ops = &vector_netdev_ops;
+       dev->ethtool_ops = &vector_net_ethtool_ops;
+       dev->watchdog_timeo = (HZ >> 1);
+       /* primary IRQ - fixme */
+       dev->irq = 0; /* we will adjust this once opened */
+
+       rtnl_lock();
+       err = register_netdevice(dev);
+       rtnl_unlock();
+       if (err)
+               goto out_undo_user_init;
+
+       spin_lock(&vector_devices_lock);
+       list_add(&device->list, &vector_devices);
+       spin_unlock(&vector_devices_lock);
+
+       return;
+
+out_undo_user_init:
+       return;
+out_free_netdev:
+       free_netdev(dev);
+out_free_device:
+       kfree(device);
+}
+
+
+
+
+/*
+ * Invoked late in the init
+ */
+
+static int __init vector_init(void)
+{
+       struct list_head *ele;
+       struct vector_cmd_line_arg *def;
+       struct arglist *parsed;
+
+       list_for_each(ele, &vec_cmd_line) {
+               def = list_entry(ele, struct vector_cmd_line_arg, list);
+               parsed = uml_parse_vector_ifspec(def->arguments);
+               if (parsed != NULL)
+                       vector_eth_configure(def->unit, parsed);
+       }
+       return 0;
+}
+
+
+/* Invoked at initial argument parsing, only stores
+ * arguments until a proper vector_init is called
+ * later
+ */
+
+static int __init vector_setup(char *str)
+{
+       char *error;
+       int n, err;
+       struct vector_cmd_line_arg *new;
+
+       err = vector_parse(str, &n, &str, &error);
+       if (err) {
+               printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
+                                str, error);
+               return 1;
+       }
+       new = alloc_bootmem(sizeof(*new));
+       INIT_LIST_HEAD(&new->list);
+       new->unit = n;
+       new->arguments = str;
+       list_add_tail(&new->list, &vec_cmd_line);
+       return 1;
+}
+
+__setup("vec", vector_setup);
+__uml_help(vector_setup,
+"vec[0-9]+:<option>=<value>,<option>=<value>\n"
+"       Configure a vector io network device.\n\n"
+);
+
+late_initcall(vector_init);
+
+static struct mc_device vector_mc = {
+       .list           = LIST_HEAD_INIT(vector_mc.list),
+       .name           = "vec",
+       .config         = vector_config,
+       .get_config     = NULL,
+       .id             = vector_id,
+       .remove         = vector_remove,
+};
+
+#ifdef CONFIG_INET
+static int vector_inetaddr_event(
+       struct notifier_block *this,
+       unsigned long event,
+       void *ptr)
+{
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block vector_inetaddr_notifier = {
+       .notifier_call          = vector_inetaddr_event,
+};
+
+static void inet_register(void)
+{
+       register_inetaddr_notifier(&vector_inetaddr_notifier);
+}
+#else
+static inline void inet_register(void)
+{
+}
+#endif
+
+static int vector_net_init(void)
+{
+       mconsole_register_dev(&vector_mc);
+       inet_register();
+       return 0;
+}
+
+__initcall(vector_net_init);
+
+
+
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
new file mode 100644 (file)
index 0000000..0b0a767
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_VECTOR_KERN_H
+#define __UM_VECTOR_KERN_H
+
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include "vector_user.h"
+
+/* Queue structure specially adapted for multiple enqueue/dequeue
+ * in a mmsgrecv/mmsgsend context
+ */
+
+/* Dequeue method */
+
+#define QUEUE_SENDMSG 0
+#define QUEUE_SENDMMSG 1
+
+#define VECTOR_RX 1
+#define VECTOR_TX (1 << 1)
+#define VECTOR_BPF (1 << 2)
+#define VECTOR_QDISC_BYPASS (1 << 3)
+
+#define ETH_MAX_PACKET 1500
+#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
+
+struct vector_queue {
+       struct mmsghdr *mmsg_vector;
+       void **skbuff_vector;
+        /* backlink to device which owns us */
+       struct net_device *dev;
+       spinlock_t head_lock;
+       spinlock_t tail_lock;
+       int queue_depth, head, tail, max_depth, max_iov_frags;
+       short options;
+};
+
+struct vector_estats {
+       uint64_t rx_queue_max;
+       uint64_t rx_queue_running_average;
+       uint64_t tx_queue_max;
+       uint64_t tx_queue_running_average;
+       uint64_t rx_encaps_errors;
+       uint64_t tx_timeout_count;
+       uint64_t tx_restart_queue;
+       uint64_t tx_kicks;
+       uint64_t tx_flow_control_xon;
+       uint64_t tx_flow_control_xoff;
+       uint64_t rx_csum_offload_good;
+       uint64_t rx_csum_offload_errors;
+       uint64_t sg_ok;
+       uint64_t sg_linearized;
+};
+
+#define VERIFY_HEADER_NOK -1
+#define VERIFY_HEADER_OK 0
+#define VERIFY_CSUM_OK 1
+
+struct vector_private {
+       struct list_head list;
+       spinlock_t lock;
+       struct net_device *dev;
+
+       int unit;
+
+       /* Timeout timer in TX */
+
+       struct timer_list tl;
+
+       /* Scheduled "remove device" work */
+       struct work_struct reset_tx;
+       struct vector_fds *fds;
+
+       struct vector_queue *rx_queue;
+       struct vector_queue *tx_queue;
+
+       int rx_irq;
+       int tx_irq;
+
+       struct arglist *parsed;
+
+       void *transport_data; /* transport specific params if needed */
+
+       int max_packet;
+       int req_size; /* different from max packet - used for TSO */
+       int headroom;
+
+       int options;
+
+       /* remote address if any - some transports will leave this as null */
+
+       int header_size;
+       int rx_header_size;
+       int coalesce;
+
+       void *header_rxbuffer;
+       void *header_txbuffer;
+
+       int (*form_header)(uint8_t *header,
+               struct sk_buff *skb, struct vector_private *vp);
+       int (*verify_header)(uint8_t *header,
+               struct sk_buff *skb, struct vector_private *vp);
+
+       spinlock_t stats_lock;
+
+       struct tasklet_struct tx_poll;
+       bool rexmit_scheduled;
+       bool opened;
+       bool in_write_poll;
+
+       /* ethtool stats */
+
+       struct vector_estats estats;
+       void *bpf;
+
+       char user[0];
+};
+
+extern int build_transport_data(struct vector_private *vp);
+
+#endif
diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c
new file mode 100644 (file)
index 0000000..9065047
--- /dev/null
@@ -0,0 +1,458 @@
+/*
+ * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ * Licensed under the GPL.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <asm/byteorder.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/virtio_net.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_byteorder.h>
+#include <linux/netdev_features.h>
+#include "vector_user.h"
+#include "vector_kern.h"
+
+#define GOOD_LINEAR 512
+#define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface"
+
+struct gre_minimal_header {
+       uint16_t header;
+       uint16_t arptype;
+};
+
+
+struct uml_gre_data {
+       uint32_t rx_key;
+       uint32_t tx_key;
+       uint32_t sequence;
+
+       bool ipv6;
+       bool has_sequence;
+       bool pin_sequence;
+       bool checksum;
+       bool key;
+       struct gre_minimal_header expected_header;
+
+       uint32_t checksum_offset;
+       uint32_t key_offset;
+       uint32_t sequence_offset;
+
+};
+
+struct uml_l2tpv3_data {
+       uint64_t rx_cookie;
+       uint64_t tx_cookie;
+       uint64_t rx_session;
+       uint64_t tx_session;
+       uint32_t counter;
+
+       bool udp;
+       bool ipv6;
+       bool has_counter;
+       bool pin_counter;
+       bool cookie;
+       bool cookie_is_64;
+
+       uint32_t cookie_offset;
+       uint32_t session_offset;
+       uint32_t counter_offset;
+};
+
+static int l2tpv3_form_header(uint8_t *header,
+       struct sk_buff *skb, struct vector_private *vp)
+{
+       struct uml_l2tpv3_data *td = vp->transport_data;
+       uint32_t *counter;
+
+       if (td->udp)
+               *(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET);
+       (*(uint32_t *) (header + td->session_offset)) = td->tx_session;
+
+       if (td->cookie) {
+               if (td->cookie_is_64)
+                       (*(uint64_t *)(header + td->cookie_offset)) =
+                               td->tx_cookie;
+               else
+                       (*(uint32_t *)(header + td->cookie_offset)) =
+                               td->tx_cookie;
+       }
+       if (td->has_counter) {
+               counter = (uint32_t *)(header + td->counter_offset);
+               if (td->pin_counter) {
+                       *counter = 0;
+               } else {
+                       td->counter++;
+                       *counter = cpu_to_be32(td->counter);
+               }
+       }
+       return 0;
+}
+
+static int gre_form_header(uint8_t *header,
+               struct sk_buff *skb, struct vector_private *vp)
+{
+       struct uml_gre_data *td = vp->transport_data;
+       uint32_t *sequence;
+       *((uint32_t *) header) = *((uint32_t *) &td->expected_header);
+       if (td->key)
+               (*(uint32_t *) (header + td->key_offset)) = td->tx_key;
+       if (td->has_sequence) {
+               sequence = (uint32_t *)(header + td->sequence_offset);
+               if (td->pin_sequence)
+                       *sequence = 0;
+               else
+                       *sequence = cpu_to_be32(++td->sequence);
+       }
+       return 0;
+}
+
+static int raw_form_header(uint8_t *header,
+               struct sk_buff *skb, struct vector_private *vp)
+{
+       struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+       virtio_net_hdr_from_skb(
+               skb,
+               vheader,
+               virtio_legacy_is_little_endian(),
+               false
+       );
+
+       return 0;
+}
+
+static int l2tpv3_verify_header(
+       uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+       struct uml_l2tpv3_data *td = vp->transport_data;
+       uint32_t *session;
+       uint64_t cookie;
+
+       if ((!td->udp) && (!td->ipv6))
+               header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+       /* we do not do a strict check for "data" packets as per
+        * the RFC spec because the pure IP spec does not have
+        * that anyway.
+        */
+
+       if (td->cookie) {
+               if (td->cookie_is_64)
+                       cookie = *(uint64_t *)(header + td->cookie_offset);
+               else
+                       cookie = *(uint32_t *)(header + td->cookie_offset);
+               if (cookie != td->rx_cookie) {
+                       if (net_ratelimit())
+                               netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id");
+                       return -1;
+               }
+       }
+       session = (uint32_t *) (header + td->session_offset);
+       if (*session != td->rx_session) {
+               if (net_ratelimit())
+                       netdev_err(vp->dev, "uml_l2tpv3: session mismatch");
+               return -1;
+       }
+       return 0;
+}
+
+static int gre_verify_header(
+       uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+
+       uint32_t key;
+       struct uml_gre_data *td = vp->transport_data;
+
+       if (!td->ipv6)
+               header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+       if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) {
+               if (net_ratelimit())
+                       netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x",
+                               *((uint32_t *) &td->expected_header),
+                               *((uint32_t *) header)
+                       );
+               return -1;
+       }
+
+       if (td->key) {
+               key = (*(uint32_t *)(header + td->key_offset));
+               if (key != td->rx_key) {
+                       if (net_ratelimit())
+                               netdev_err(vp->dev, "unknown key id %0x, expecting %0x",
+                                               key, td->rx_key);
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static int raw_verify_header(
+       uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+       struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+       if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) &&
+               (vp->req_size != 65536)) {
+               if (net_ratelimit())
+                       netdev_err(
+                               vp->dev,
+                               GSO_ERROR
+               );
+       }
+       if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0)
+               return 1;
+
+       virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian());
+       return 0;
+}
+
+static bool get_uint_param(
+       struct arglist *def, char *param, unsigned int *result)
+{
+       char *arg = uml_vector_fetch_arg(def, param);
+
+       if (arg != NULL) {
+               if (kstrtoint(arg, 0, result) == 0)
+                       return true;
+       }
+       return false;
+}
+
+static bool get_ulong_param(
+       struct arglist *def, char *param, unsigned long *result)
+{
+       char *arg = uml_vector_fetch_arg(def, param);
+
+       if (arg != NULL) {
+               if (kstrtoul(arg, 0, result) == 0)
+                       return true;
+               return true;
+       }
+       return false;
+}
+
+static int build_gre_transport_data(struct vector_private *vp)
+{
+       struct uml_gre_data *td;
+       int temp_int;
+       int temp_rx;
+       int temp_tx;
+
+       vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL);
+       if (vp->transport_data == NULL)
+               return -ENOMEM;
+       td = vp->transport_data;
+       td->sequence = 0;
+
+       td->expected_header.arptype = GRE_IRB;
+       td->expected_header.header = 0;
+
+       vp->form_header = &gre_form_header;
+       vp->verify_header = &gre_verify_header;
+       vp->header_size = 4;
+       td->key_offset = 4;
+       td->sequence_offset = 4;
+       td->checksum_offset = 4;
+
+       td->ipv6 = false;
+       if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+               if (temp_int > 0)
+                       td->ipv6 = true;
+       }
+       td->key = false;
+       if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) {
+               if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) {
+                       td->key = true;
+                       td->expected_header.header |= GRE_MODE_KEY;
+                       td->rx_key = cpu_to_be32(temp_rx);
+                       td->tx_key = cpu_to_be32(temp_tx);
+                       vp->header_size += 4;
+                       td->sequence_offset += 4;
+               } else {
+                       return -EINVAL;
+               }
+       }
+
+       td->sequence = false;
+       if (get_uint_param(vp->parsed, "sequence", &temp_int)) {
+               if (temp_int > 0) {
+                       vp->header_size += 4;
+                       td->has_sequence = true;
+                       td->expected_header.header |= GRE_MODE_SEQUENCE;
+                       if (get_uint_param(
+                               vp->parsed, "pin_sequence", &temp_int)) {
+                               if (temp_int > 0)
+                                       td->pin_sequence = true;
+                       }
+               }
+       }
+       vp->rx_header_size = vp->header_size;
+       if (!td->ipv6)
+               vp->rx_header_size += sizeof(struct iphdr);
+       return 0;
+}
+
+static int build_l2tpv3_transport_data(struct vector_private *vp)
+{
+
+       struct uml_l2tpv3_data *td;
+       int temp_int, temp_rxs, temp_txs;
+       unsigned long temp_rx;
+       unsigned long temp_tx;
+
+       vp->transport_data = kmalloc(
+               sizeof(struct uml_l2tpv3_data), GFP_KERNEL);
+
+       if (vp->transport_data == NULL)
+               return -ENOMEM;
+
+       td = vp->transport_data;
+
+       vp->form_header = &l2tpv3_form_header;
+       vp->verify_header = &l2tpv3_verify_header;
+       td->counter = 0;
+
+       vp->header_size = 4;
+       td->session_offset = 0;
+       td->cookie_offset = 4;
+       td->counter_offset = 4;
+
+
+       td->ipv6 = false;
+       if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+               if (temp_int > 0)
+                       td->ipv6 = true;
+       }
+
+       if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) {
+               if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) {
+                       td->tx_session = cpu_to_be32(temp_txs);
+                       td->rx_session = cpu_to_be32(temp_rxs);
+               } else {
+                       return -EINVAL;
+               }
+       } else {
+               return -EINVAL;
+       }
+
+       td->cookie_is_64  = false;
+       if (get_uint_param(vp->parsed, "cookie64", &temp_int)) {
+               if (temp_int > 0)
+                       td->cookie_is_64  = true;
+       }
+       td->cookie = false;
+       if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) {
+               if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) {
+                       td->cookie = true;
+                       if (td->cookie_is_64) {
+                               td->rx_cookie = cpu_to_be64(temp_rx);
+                               td->tx_cookie = cpu_to_be64(temp_tx);
+                               vp->header_size += 8;
+                               td->counter_offset += 8;
+                       } else {
+                               td->rx_cookie = cpu_to_be32(temp_rx);
+                               td->tx_cookie = cpu_to_be32(temp_tx);
+                               vp->header_size += 4;
+                               td->counter_offset += 4;
+                       }
+               } else {
+                       return -EINVAL;
+               }
+       }
+
+       td->has_counter = false;
+       if (get_uint_param(vp->parsed, "counter", &temp_int)) {
+               if (temp_int > 0) {
+                       td->has_counter = true;
+                       vp->header_size += 4;
+                       if (get_uint_param(
+                               vp->parsed, "pin_counter", &temp_int)) {
+                               if (temp_int > 0)
+                                       td->pin_counter = true;
+                       }
+               }
+       }
+
+       if (get_uint_param(vp->parsed, "udp", &temp_int)) {
+               if (temp_int > 0) {
+                       td->udp = true;
+                       vp->header_size += 4;
+                       td->counter_offset += 4;
+                       td->session_offset += 4;
+                       td->cookie_offset += 4;
+               }
+       }
+
+       vp->rx_header_size = vp->header_size;
+       if ((!td->ipv6) && (!td->udp))
+               vp->rx_header_size += sizeof(struct iphdr);
+
+       return 0;
+}
+
+static int build_raw_transport_data(struct vector_private *vp)
+{
+       if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+               if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd))
+                       return -1;
+               vp->form_header = &raw_form_header;
+               vp->verify_header = &raw_verify_header;
+               vp->header_size = sizeof(struct virtio_net_hdr);
+               vp->rx_header_size = sizeof(struct virtio_net_hdr);
+               vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO);
+               vp->dev->features |=
+                       (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+                               NETIF_F_TSO | NETIF_F_GRO);
+               netdev_info(
+                       vp->dev,
+                       "raw: using vnet headers for tso and tx/rx checksum"
+               );
+       }
+       return 0;
+}
+
+static int build_tap_transport_data(struct vector_private *vp)
+{
+       if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+               vp->form_header = &raw_form_header;
+               vp->verify_header = &raw_verify_header;
+               vp->header_size = sizeof(struct virtio_net_hdr);
+               vp->rx_header_size = sizeof(struct virtio_net_hdr);
+               vp->dev->hw_features |=
+                       (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+               vp->dev->features |=
+                       (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+                               NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+               netdev_info(
+                       vp->dev,
+                       "tap/raw: using vnet headers for tso and tx/rx checksum"
+               );
+       } else {
+               return 0; /* do not try to enable tap too if raw failed */
+       }
+       if (uml_tap_enable_vnet_headers(vp->fds->tx_fd))
+               return 0;
+       return -1;
+}
+
+int build_transport_data(struct vector_private *vp)
+{
+       char *transport = uml_vector_fetch_arg(vp->parsed, "transport");
+
+       if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+               return build_gre_transport_data(vp);
+       if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+               return build_l2tpv3_transport_data(vp);
+       if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+               return build_raw_transport_data(vp);
+       if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+               return build_tap_transport_data(vp);
+       return 0;
+}
+
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
new file mode 100644 (file)
index 0000000..4d6a78e
--- /dev/null
@@ -0,0 +1,590 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/ether.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <linux/virtio_net.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <os.h>
+#include <um_malloc.h>
+#include "vector_user.h"
+
+#define ID_GRE 0
+#define ID_L2TPV3 1
+#define ID_MAX 1
+
+#define TOKEN_IFNAME "ifname"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
+#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
+#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
+#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n"
+
+/* This is very ugly and brute force lookup, but it is done
+ * only once at initialization so not worth doing hashes or
+ * anything more intelligent
+ */
+
+char *uml_vector_fetch_arg(struct arglist *ifspec, char *token)
+{
+       int i;
+
+       for (i = 0; i < ifspec->numargs; i++) {
+               if (strcmp(ifspec->tokens[i], token) == 0)
+                       return ifspec->values[i];
+       }
+       return NULL;
+
+}
+
+struct arglist *uml_parse_vector_ifspec(char *arg)
+{
+       struct arglist *result;
+       int pos, len;
+       bool parsing_token = true, next_starts = true;
+
+       if (arg == NULL)
+               return NULL;
+       result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL);
+       if (result == NULL)
+               return NULL;
+       result->numargs = 0;
+       len = strlen(arg);
+       for (pos = 0; pos < len; pos++) {
+               if (next_starts) {
+                       if (parsing_token) {
+                               result->tokens[result->numargs] = arg + pos;
+                       } else {
+                               result->values[result->numargs] = arg + pos;
+                               result->numargs++;
+                       }
+                       next_starts = false;
+               }
+               if (*(arg + pos) == '=') {
+                       if (parsing_token)
+                               parsing_token = false;
+                       else
+                               goto cleanup;
+                       next_starts = true;
+                       (*(arg + pos)) = '\0';
+               }
+               if (*(arg + pos) == ',') {
+                       parsing_token = true;
+                       next_starts = true;
+                       (*(arg + pos)) = '\0';
+               }
+       }
+       return result;
+cleanup:
+       printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg);
+       kfree(result);
+       return NULL;
+}
+
+/*
+ * Socket/FD configuration functions. These return an structure
+ * of rx and tx descriptors to cover cases where these are not
+ * the same (f.e. read via raw socket and write via tap).
+ */
+
+#define PATH_NET_TUN "/dev/net/tun"
+
+static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
+{
+       struct ifreq ifr;
+       int fd = -1;
+       struct sockaddr_ll sock;
+       int err = -ENOMEM, offload;
+       char *iface;
+       struct vector_fds *result = NULL;
+
+       iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+       if (iface == NULL) {
+               printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
+               goto tap_cleanup;
+       }
+
+       result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+       if (result == NULL) {
+               printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n");
+               goto tap_cleanup;
+       }
+       result->rx_fd = -1;
+       result->tx_fd = -1;
+       result->remote_addr = NULL;
+       result->remote_addr_size = 0;
+
+       /* TAP */
+
+       fd = open(PATH_NET_TUN, O_RDWR);
+       if (fd < 0) {
+               printk(UM_KERN_ERR "uml_tap: failed to open tun device\n");
+               goto tap_cleanup;
+       }
+       result->tx_fd = fd;
+       memset(&ifr, 0, sizeof(ifr));
+       ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+
+       err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+       if (err != 0) {
+               printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n");
+               goto tap_cleanup;
+       }
+
+       offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+       ioctl(fd, TUNSETOFFLOAD, offload);
+
+       /* RAW */
+
+       fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+       if (fd == -1) {
+               printk(UM_KERN_ERR
+                       "uml_tap: failed to create socket: %i\n", -errno);
+               goto tap_cleanup;
+       }
+       result->rx_fd = fd;
+       memset(&ifr, 0, sizeof(ifr));
+       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+       if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+               printk(UM_KERN_ERR
+                       "uml_tap: failed to set interface: %i\n", -errno);
+               goto tap_cleanup;
+       }
+
+       sock.sll_family = AF_PACKET;
+       sock.sll_protocol = htons(ETH_P_ALL);
+       sock.sll_ifindex = ifr.ifr_ifindex;
+
+       if (bind(fd,
+               (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+               printk(UM_KERN_ERR
+                       "user_init_tap: failed to bind raw pair, err %d\n",
+                               -errno);
+               goto tap_cleanup;
+       }
+       return result;
+tap_cleanup:
+       printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err);
+       if (result != NULL) {
+               if (result->rx_fd >= 0)
+                       os_close_file(result->rx_fd);
+               if (result->tx_fd >= 0)
+                       os_close_file(result->tx_fd);
+               kfree(result);
+       }
+       return NULL;
+}
+
+
+static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
+{
+       struct ifreq ifr;
+       int rxfd = -1, txfd = -1;
+       struct sockaddr_ll sock;
+       int err = -ENOMEM;
+       char *iface;
+       struct vector_fds *result = NULL;
+
+       iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+       if (iface == NULL)
+               goto cleanup;
+
+       rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL);
+       if (rxfd == -1) {
+               err = -errno;
+               goto cleanup;
+       }
+       txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */
+       if (txfd == -1) {
+               err = -errno;
+               goto cleanup;
+       }
+       memset(&ifr, 0, sizeof(ifr));
+       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+       if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+               err = -errno;
+               goto cleanup;
+       }
+
+       sock.sll_family = AF_PACKET;
+       sock.sll_protocol = htons(ETH_P_ALL);
+       sock.sll_ifindex = ifr.ifr_ifindex;
+
+       if (bind(rxfd,
+               (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+               err = -errno;
+               goto cleanup;
+       }
+
+       sock.sll_family = AF_PACKET;
+       sock.sll_protocol = htons(ETH_P_IP);
+       sock.sll_ifindex = ifr.ifr_ifindex;
+
+       if (bind(txfd,
+               (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+               err = -errno;
+               goto cleanup;
+       }
+
+       result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+       if (result != NULL) {
+               result->rx_fd = rxfd;
+               result->tx_fd = txfd;
+               result->remote_addr = NULL;
+               result->remote_addr_size = 0;
+       }
+       return result;
+cleanup:
+       printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
+       if (rxfd >= 0)
+               os_close_file(rxfd);
+       if (txfd >= 0)
+               os_close_file(txfd);
+       if (result != NULL)
+               kfree(result);
+       return NULL;
+}
+
+
+bool uml_raw_enable_qdisc_bypass(int fd)
+{
+       int optval = 1;
+
+       if (setsockopt(fd,
+               SOL_PACKET, PACKET_QDISC_BYPASS,
+               &optval, sizeof(optval)) != 0) {
+               return false;
+       }
+       return true;
+}
+
+bool uml_raw_enable_vnet_headers(int fd)
+{
+       int optval = 1;
+
+       if (setsockopt(fd,
+               SOL_PACKET, PACKET_VNET_HDR,
+               &optval, sizeof(optval)) != 0) {
+               printk(UM_KERN_INFO VNET_HDR_FAIL, fd);
+               return false;
+       }
+       return true;
+}
+bool uml_tap_enable_vnet_headers(int fd)
+{
+       unsigned int features;
+       int len = sizeof(struct virtio_net_hdr);
+
+       if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
+               printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno));
+               return false;
+       }
+       if ((features & IFF_VNET_HDR) == 0) {
+               printk(UM_KERN_INFO "tapraw: No VNET HEADER support");
+               return false;
+       }
+       ioctl(fd, TUNSETVNETHDRSZ, &len);
+       return true;
+}
+
+static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id)
+{
+       int err = -ENOMEM;
+       int fd = -1, gairet;
+       struct addrinfo srchints;
+       struct addrinfo dsthints;
+       bool v6, udp;
+       char *value;
+       char *src, *dst, *srcport, *dstport;
+       struct addrinfo *gairesult = NULL;
+       struct vector_fds *result = NULL;
+
+
+       value = uml_vector_fetch_arg(ifspec, "v6");
+       v6 = false;
+       udp = false;
+       if (value != NULL) {
+               if (strtol((const char *) value, NULL, 10) > 0)
+                       v6 = true;
+       }
+
+       value = uml_vector_fetch_arg(ifspec, "udp");
+       if (value != NULL) {
+               if (strtol((const char *) value, NULL, 10) > 0)
+                       udp = true;
+       }
+       src = uml_vector_fetch_arg(ifspec, "src");
+       dst = uml_vector_fetch_arg(ifspec, "dst");
+       srcport = uml_vector_fetch_arg(ifspec, "srcport");
+       dstport = uml_vector_fetch_arg(ifspec, "dstport");
+
+       memset(&dsthints, 0, sizeof(dsthints));
+
+       if (v6)
+               dsthints.ai_family = AF_INET6;
+       else
+               dsthints.ai_family = AF_INET;
+
+       switch (id) {
+       case ID_GRE:
+               dsthints.ai_socktype = SOCK_RAW;
+               dsthints.ai_protocol = IPPROTO_GRE;
+               break;
+       case ID_L2TPV3:
+               if (udp) {
+                       dsthints.ai_socktype = SOCK_DGRAM;
+                       dsthints.ai_protocol = 0;
+               } else {
+                       dsthints.ai_socktype = SOCK_RAW;
+                       dsthints.ai_protocol = IPPROTO_L2TP;
+               }
+               break;
+       default:
+               printk(KERN_ERR "Unsupported socket type\n");
+               return NULL;
+       }
+       memcpy(&srchints, &dsthints, sizeof(struct addrinfo));
+
+       gairet = getaddrinfo(src, srcport, &dsthints, &gairesult);
+       if ((gairet != 0) || (gairesult == NULL)) {
+               printk(UM_KERN_ERR
+                       "socket_open : could not resolve src, error = %s",
+                       gai_strerror(gairet)
+               );
+               return NULL;
+       }
+       fd = socket(gairesult->ai_family,
+               gairesult->ai_socktype, gairesult->ai_protocol);
+       if (fd == -1) {
+               printk(UM_KERN_ERR
+                       "socket_open : could not open socket, error = %d",
+                       -errno
+               );
+               goto cleanup;
+       }
+       if (bind(fd,
+               (struct sockaddr *) gairesult->ai_addr,
+               gairesult->ai_addrlen)) {
+               printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno);
+               goto cleanup;
+       }
+
+       if (gairesult != NULL)
+               freeaddrinfo(gairesult);
+
+       gairesult = NULL;
+
+       gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult);
+       if ((gairet != 0) || (gairesult == NULL)) {
+               printk(UM_KERN_ERR
+                       "socket_open : could not resolve dst, error = %s",
+                       gai_strerror(gairet)
+               );
+               return NULL;
+       }
+
+       result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+       if (result != NULL) {
+               result->rx_fd = fd;
+               result->tx_fd = fd;
+               result->remote_addr = uml_kmalloc(
+                       gairesult->ai_addrlen, UM_GFP_KERNEL);
+               if (result->remote_addr == NULL)
+                       goto cleanup;
+               result->remote_addr_size = gairesult->ai_addrlen;
+               memcpy(
+                       result->remote_addr,
+                       gairesult->ai_addr,
+                       gairesult->ai_addrlen
+               );
+       }
+       freeaddrinfo(gairesult);
+       return result;
+cleanup:
+       if (gairesult != NULL)
+               freeaddrinfo(gairesult);
+       printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err);
+       if (fd >= 0)
+               os_close_file(fd);
+       if (result != NULL) {
+               if (result->remote_addr != NULL)
+                       kfree(result->remote_addr);
+               kfree(result);
+       }
+       return NULL;
+}
+
+struct vector_fds *uml_vector_user_open(
+       int unit,
+       struct arglist *parsed
+)
+{
+       char *transport;
+
+       if (parsed == NULL) {
+               printk(UM_KERN_ERR "no parsed config for unit %d\n", unit);
+               return NULL;
+       }
+       transport = uml_vector_fetch_arg(parsed, "transport");
+       if (transport == NULL) {
+               printk(UM_KERN_ERR "missing transport for unit %d\n", unit);
+               return NULL;
+       }
+       if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+               return user_init_raw_fds(parsed);
+       if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+               return user_init_tap_fds(parsed);
+       if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+               return user_init_socket_fds(parsed, ID_GRE);
+       if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+               return user_init_socket_fds(parsed, ID_L2TPV3);
+       return NULL;
+}
+
+
+int uml_vector_sendmsg(int fd, void *hdr, int flags)
+{
+       int n;
+
+       CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr,  flags));
+       if ((n < 0) && (errno == EAGAIN))
+               return 0;
+       if (n >= 0)
+               return n;
+       else
+               return -errno;
+}
+
+int uml_vector_recvmsg(int fd, void *hdr, int flags)
+{
+       int n;
+
+       CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr,  flags));
+       if ((n < 0) && (errno == EAGAIN))
+               return 0;
+       if (n >= 0)
+               return n;
+       else
+               return -errno;
+}
+
+int uml_vector_writev(int fd, void *hdr, int iovcount)
+{
+       int n;
+
+       CATCH_EINTR(n = writev(fd, (struct iovec *) hdr,  iovcount));
+       if ((n < 0) && (errno == EAGAIN))
+               return 0;
+       if (n >= 0)
+               return n;
+       else
+               return -errno;
+}
+
+int uml_vector_sendmmsg(
+       int fd,
+       void *msgvec,
+       unsigned int vlen,
+       unsigned int flags)
+{
+       int n;
+
+       CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags));
+       if ((n < 0) && (errno == EAGAIN))
+               return 0;
+       if (n >= 0)
+               return n;
+       else
+               return -errno;
+}
+
+int uml_vector_recvmmsg(
+       int fd,
+       void *msgvec,
+       unsigned int vlen,
+       unsigned int flags)
+{
+       int n;
+
+       CATCH_EINTR(
+               n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0));
+       if ((n < 0) && (errno == EAGAIN))
+               return 0;
+       if (n >= 0)
+               return n;
+       else
+               return -errno;
+}
+int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len)
+{
+       int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len);
+
+       if (err < 0)
+               printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno);
+       return err;
+}
+
+#define DEFAULT_BPF_LEN 6
+
+void *uml_vector_default_bpf(int fd, void *mac)
+{
+       struct sock_filter *bpf;
+       uint32_t *mac1 = (uint32_t *)(mac + 2);
+       uint16_t *mac2 = (uint16_t *) mac;
+       struct sock_fprog bpf_prog = {
+               .len = 6,
+               .filter = NULL,
+       };
+
+       bpf = uml_kmalloc(
+               sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL);
+       if (bpf != NULL) {
+               bpf_prog.filter = bpf;
+               /* ld   [8] */
+               bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 };
+               /* jeq  #0xMAC[2-6] jt 2 jf 5*/
+               bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)};
+               /* ldh  [6] */
+               bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 };
+               /* jeq  #0xMAC[0-1] jt 4 jf 5 */
+               bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)};
+               /* ret  #0 */
+               bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 };
+               /* ret  #0x40000 */
+               bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 };
+               if (uml_vector_attach_bpf(
+                       fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) {
+                       kfree(bpf);
+                       bpf = NULL;
+               }
+       }
+       return bpf;
+}
+
diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h
new file mode 100644 (file)
index 0000000..d7cbff7
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_VECTOR_USER_H
+#define __UM_VECTOR_USER_H
+
+#define MAXVARGS       20
+
+#define TOKEN_IFNAME "ifname"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define TRANS_TAP "tap"
+#define TRANS_TAP_LEN strlen(TRANS_TAP)
+
+
+#define TRANS_GRE "gre"
+#define TRANS_GRE_LEN strlen(TRANS_RAW)
+
+#define TRANS_L2TPV3 "l2tpv3"
+#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
+
+#ifndef IPPROTO_GRE
+#define IPPROTO_GRE 0x2F
+#endif
+
+#define GRE_MODE_CHECKSUM      cpu_to_be16(8 << 12)    /* checksum */
+#define GRE_MODE_RESERVED      cpu_to_be16(4 << 12)    /* unused */
+#define GRE_MODE_KEY           cpu_to_be16(2 << 12)    /* KEY present */
+#define GRE_MODE_SEQUENCE      cpu_to_be16(1 << 12)    /* sequence */
+
+#define GRE_IRB cpu_to_be16(0x6558)
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+struct arglist {
+       int     numargs;
+       char    *tokens[MAXVARGS];
+       char    *values[MAXVARGS];
+};
+
+/* Separating read and write FDs allows us to have different
+ * rx and tx method. Example - read tap via raw socket using
+ * recvmmsg, write using legacy tap write calls
+ */
+
+struct vector_fds {
+       int rx_fd;
+       int tx_fd;
+       void *remote_addr;
+       int remote_addr_size;
+};
+
+#define VECTOR_READ    1
+#define VECTOR_WRITE   (1 < 1)
+#define VECTOR_HEADERS (1 < 2)
+
+extern struct arglist *uml_parse_vector_ifspec(char *arg);
+
+extern struct vector_fds *uml_vector_user_open(
+       int unit,
+       struct arglist *parsed
+);
+
+extern char *uml_vector_fetch_arg(
+       struct arglist *ifspec,
+       char *token
+);
+
+extern int uml_vector_recvmsg(int fd, void *hdr, int flags);
+extern int uml_vector_sendmsg(int fd, void *hdr, int flags);
+extern int uml_vector_writev(int fd, void *hdr, int iovcount);
+extern int uml_vector_sendmmsg(
+       int fd, void *msgvec,
+       unsigned int vlen,
+       unsigned int flags
+);
+extern int uml_vector_recvmmsg(
+       int fd,
+       void *msgvec,
+       unsigned int vlen,
+       unsigned int flags
+);
+extern void *uml_vector_default_bpf(int fd, void *mac);
+extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len);
+extern bool uml_raw_enable_qdisc_bypass(int fd);
+extern bool uml_raw_enable_vnet_headers(int fd);
+extern bool uml_tap_enable_vnet_headers(int fd);
+
+
+#endif
diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h
new file mode 100644 (file)
index 0000000..5898a26
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/asm-prototypes.h>
index b5cdd3f91157dacb8b543fa6ff56f085aba7d2d1..49ed3e35b35adb1a5b679cfcbfe4f72d2cb97d09 100644 (file)
 #define XTERM_IRQ              13
 #define RANDOM_IRQ             14
 
+#ifdef CONFIG_UML_NET_VECTOR
+
+#define VECTOR_BASE_IRQ                15
+#define VECTOR_IRQ_SPACE       8
+
+#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
+
+#else
+
 #define LAST_IRQ RANDOM_IRQ
+
+#endif
+
 #define NR_IRQS (LAST_IRQ + 1)
 
 #endif
index df5633053957d309d91fde9ea2ab743d79693e2e..a7a6120f19d55ae505114e419d1b841ea7f90cee 100644 (file)
@@ -7,6 +7,7 @@
 #define __IRQ_USER_H__
 
 #include <sysdep/ptrace.h>
+#include <stdbool.h>
 
 struct irq_fd {
        struct irq_fd *next;
@@ -15,10 +16,17 @@ struct irq_fd {
        int type;
        int irq;
        int events;
-       int current_events;
+       bool active;
+       bool pending;
+       bool purge;
 };
 
-enum { IRQ_READ, IRQ_WRITE };
+#define IRQ_READ  0
+#define IRQ_WRITE 1
+#define IRQ_NONE 2
+#define MAX_IRQ_TYPE (IRQ_NONE + 1)
+
+
 
 struct siginfo;
 extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
index 012ac87d49004ee5d2d584a74a1274b2d20e2d07..40442b98b17351356b835207376303d1c3be2379 100644 (file)
@@ -65,5 +65,7 @@ extern int tap_setup_common(char *str, char *type, char **dev_name,
                            char **mac_out, char **gate_addr);
 extern void register_transport(struct transport *new);
 extern unsigned short eth_protocol(struct sk_buff *skb);
+extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
+
 
 #endif
index d8ddaf9790d2bd858db7f4e41f45d43d1a49d0d0..048ae37eb5aa1add1d9732085d03489320a1c42b 100644 (file)
@@ -290,15 +290,16 @@ extern void halt_skas(void);
 extern void reboot_skas(void);
 
 /* irq.c */
-extern int os_waiting_for_events(struct irq_fd *active_fds);
-extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds);
-extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-               struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
-extern void os_free_irq_later(struct irq_fd *active_fds,
-               int irq, void *dev_id);
-extern int os_get_pollfd(int i);
-extern void os_set_pollfd(int i, int fd);
+extern int os_waiting_for_events_epoll(void);
+extern void *os_epoll_get_data_pointer(int index);
+extern int os_epoll_triggered(int index, int events);
+extern int os_event_mask(int irq_type);
+extern int os_setup_epoll(void);
+extern int os_add_epoll_fd(int events, int fd, void *data);
+extern int os_mod_epoll_fd(int events, int fd, void *data);
+extern int os_del_epoll_fd(int fd);
 extern void os_set_ioignore(void);
+extern void os_close_epoll_fd(void);
 
 /* sigio.c */
 extern int add_sigio_fd(int fd);
index 23cb9350d47eb5271bf6d0fa2a227bfeb34ca119..6b7f3827d6e4add1993315c220bf96217bfb8986 100644 (file)
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
 #include <as-layout.h>
 #include <kern_util.h>
 #include <os.h>
+#include <irq_user.h>
 
-/*
- * This list is accessed under irq_lock, except in sigio_handler,
- * where it is safe from being modified.  IRQ handlers won't change it -
- * if an IRQ source has vanished, it will be freed by free_irqs just
- * before returning from sigio_handler.  That will process a separate
- * list of irqs to free, with its own locking, coming back here to
- * remove list elements, taking the irq_lock to do so.
+
+/* When epoll triggers we do not know why it did so
+ * we can also have different IRQs for read and write.
+ * This is why we keep a small irq_fd array for each fd -
+ * one entry per IRQ type
  */
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
 
-extern void free_irqs(void);
+struct irq_entry {
+       struct irq_entry *next;
+       int fd;
+       struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
+};
+
+static struct irq_entry *active_fds;
+
+static DEFINE_SPINLOCK(irq_lock);
+
+static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
+{
+/*
+ * irq->active guards against reentry
+ * irq->pending accumulates pending requests
+ * if pending is raised the irq_handler is re-run
+ * until pending is cleared
+ */
+       if (irq->active) {
+               irq->active = false;
+               do {
+                       irq->pending = false;
+                       do_IRQ(irq->irq, regs);
+               } while (irq->pending && (!irq->purge));
+               if (!irq->purge)
+                       irq->active = true;
+       } else {
+               irq->pending = true;
+       }
+}
 
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
-       struct irq_fd *irq_fd;
-       int n;
+       struct irq_entry *irq_entry;
+       struct irq_fd *irq;
+
+       int n, i, j;
 
        while (1) {
-               n = os_waiting_for_events(active_fds);
+               /* This is now lockless - epoll keeps back-referencesto the irqs
+                * which have trigger it so there is no need to walk the irq
+                * list and lock it every time. We avoid locking by turning off
+                * IO for a specific fd by executing os_del_epoll_fd(fd) before
+                * we do any changes to the actual data structures
+                */
+               n = os_waiting_for_events_epoll();
+
                if (n <= 0) {
                        if (n == -EINTR)
                                continue;
-                       else break;
+                       else
+                               break;
                }
 
-               for (irq_fd = active_fds; irq_fd != NULL;
-                    irq_fd = irq_fd->next) {
-                       if (irq_fd->current_events != 0) {
-                               irq_fd->current_events = 0;
-                               do_IRQ(irq_fd->irq, regs);
+               for (i = 0; i < n ; i++) {
+                       /* Epoll back reference is the entry with 3 irq_fd
+                        * leaves - one for each irq type.
+                        */
+                       irq_entry = (struct irq_entry *)
+                               os_epoll_get_data_pointer(i);
+                       for (j = 0; j < MAX_IRQ_TYPE ; j++) {
+                               irq = irq_entry->irq_array[j];
+                               if (irq == NULL)
+                                       continue;
+                               if (os_epoll_triggered(i, irq->events) > 0)
+                                       irq_io_loop(irq, regs);
+                               if (irq->purge) {
+                                       irq_entry->irq_array[j] = NULL;
+                                       kfree(irq);
+                               }
                        }
                }
        }
+}
+
+static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
+{
+       int i;
+       int events = 0;
+       struct irq_fd *irq;
 
-       free_irqs();
+       for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+               irq = irq_entry->irq_array[i];
+               if (irq != NULL)
+                       events = irq->events | events;
+       }
+       if (events > 0) {
+       /* os_add_epoll will call os_mod_epoll if this already exists */
+               return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+       }
+       /* No events - delete */
+       return os_del_epoll_fd(irq_entry->fd);
 }
 
-static DEFINE_SPINLOCK(irq_lock);
+
 
 static int activate_fd(int irq, int fd, int type, void *dev_id)
 {
-       struct pollfd *tmp_pfd;
-       struct irq_fd *new_fd, *irq_fd;
+       struct irq_fd *new_fd;
+       struct irq_entry *irq_entry;
+       int i, err, events;
        unsigned long flags;
-       int events, err, n;
 
        err = os_set_fd_async(fd);
        if (err < 0)
                goto out;
 
-       err = -ENOMEM;
-       new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL);
-       if (new_fd == NULL)
-               goto out;
+       spin_lock_irqsave(&irq_lock, flags);
 
-       if (type == IRQ_READ)
-               events = UM_POLLIN | UM_POLLPRI;
-       else events = UM_POLLOUT;
-       *new_fd = ((struct irq_fd) { .next              = NULL,
-                                    .id                = dev_id,
-                                    .fd                = fd,
-                                    .type              = type,
-                                    .irq               = irq,
-                                    .events            = events,
-                                    .current_events    = 0 } );
+       /* Check if we have an entry for this fd */
 
        err = -EBUSY;
-       spin_lock_irqsave(&irq_lock, flags);
-       for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
-               if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
-                       printk(KERN_ERR "Registering fd %d twice\n", fd);
-                       printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
-                       printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
-                              dev_id);
+       for (irq_entry = active_fds;
+               irq_entry != NULL; irq_entry = irq_entry->next) {
+               if (irq_entry->fd == fd)
+                       break;
+       }
+
+       if (irq_entry == NULL) {
+               /* This needs to be atomic as it may be called from an
+                * IRQ context.
+                */
+               irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
+               if (irq_entry == NULL) {
+                       printk(KERN_ERR
+                               "Failed to allocate new IRQ entry\n");
                        goto out_unlock;
                }
+               irq_entry->fd = fd;
+               for (i = 0; i < MAX_IRQ_TYPE; i++)
+                       irq_entry->irq_array[i] = NULL;
+               irq_entry->next = active_fds;
+               active_fds = irq_entry;
        }
 
-       if (type == IRQ_WRITE)
-               fd = -1;
-
-       tmp_pfd = NULL;
-       n = 0;
+       /* Check if we are trying to re-register an interrupt for a
+        * particular fd
+        */
 
-       while (1) {
-               n = os_create_pollfd(fd, events, tmp_pfd, n);
-               if (n == 0)
-                       break;
+       if (irq_entry->irq_array[type] != NULL) {
+               printk(KERN_ERR
+                       "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
+                       irq, fd, type, dev_id
+               );
+               goto out_unlock;
+       } else {
+               /* New entry for this fd */
+
+               err = -ENOMEM;
+               new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
+               if (new_fd == NULL)
+                       goto out_unlock;
 
-               /*
-                * n > 0
-                * It means we couldn't put new pollfd to current pollfds
-                * and tmp_fds is NULL or too small for new pollfds array.
-                * Needed size is equal to n as minimum.
-                *
-                * Here we have to drop the lock in order to call
-                * kmalloc, which might sleep.
-                * If something else came in and changed the pollfds array
-                * so we will not be able to put new pollfd struct to pollfds
-                * then we free the buffer tmp_fds and try again.
+               events = os_event_mask(type);
+
+               *new_fd = ((struct irq_fd) {
+                       .id             = dev_id,
+                       .irq            = irq,
+                       .type           = type,
+                       .events         = events,
+                       .active         = true,
+                       .pending        = false,
+                       .purge          = false
+               });
+               /* Turn off any IO on this fd - allows us to
+                * avoid locking the IRQ loop
                 */
-               spin_unlock_irqrestore(&irq_lock, flags);
-               kfree(tmp_pfd);
-
-               tmp_pfd = kmalloc(n, GFP_KERNEL);
-               if (tmp_pfd == NULL)
-                       goto out_kfree;
-
-               spin_lock_irqsave(&irq_lock, flags);
+               os_del_epoll_fd(irq_entry->fd);
+               irq_entry->irq_array[type] = new_fd;
        }
 
-       *last_irq_ptr = new_fd;
-       last_irq_ptr = &new_fd->next;
-
+       /* Turn back IO on with the correct (new) IO event mask */
+       assign_epoll_events_to_irq(irq_entry);
        spin_unlock_irqrestore(&irq_lock, flags);
-
-       /*
-        * This calls activate_fd, so it has to be outside the critical
-        * section.
-        */
-       maybe_sigio_broken(fd, (type == IRQ_READ));
+       maybe_sigio_broken(fd, (type != IRQ_NONE));
 
        return 0;
-
- out_unlock:
+out_unlock:
        spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
-       kfree(new_fd);
- out:
+out:
        return err;
 }
 
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
+/*
+ * Walk the IRQ list and dispose of any unused entries.
+ * Should be done under irq_lock.
+ */
+
+static void garbage_collect_irq_entries(void)
 {
-       unsigned long flags;
+       int i;
+       bool reap;
+       struct irq_entry *walk;
+       struct irq_entry *previous = NULL;
+       struct irq_entry *to_free;
 
-       spin_lock_irqsave(&irq_lock, flags);
-       os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
-       spin_unlock_irqrestore(&irq_lock, flags);
+       if (active_fds == NULL)
+               return;
+       walk = active_fds;
+       while (walk != NULL) {
+               reap = true;
+               for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+                       if (walk->irq_array[i] != NULL) {
+                               reap = false;
+                               break;
+                       }
+               }
+               if (reap) {
+                       if (previous == NULL)
+                               active_fds = walk->next;
+                       else
+                               previous->next = walk->next;
+                       to_free = walk;
+               } else {
+                       to_free = NULL;
+               }
+               walk = walk->next;
+               if (to_free != NULL)
+                       kfree(to_free);
+       }
 }
 
-struct irq_and_dev {
-       int irq;
-       void *dev;
-};
+/*
+ * Walk the IRQ list and get the descriptor for our FD
+ */
 
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static struct irq_entry *get_irq_entry_by_fd(int fd)
 {
-       struct irq_and_dev *data = d;
+       struct irq_entry *walk = active_fds;
 
-       return ((irq->irq == data->irq) && (irq->id == data->dev));
+       while (walk != NULL) {
+               if (walk->fd == fd)
+                       return walk;
+               walk = walk->next;
+       }
+       return NULL;
 }
 
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
-{
-       struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
-                                                         .dev  = dev });
 
-       free_irq_by_cb(same_irq_and_dev, &data);
-}
+/*
+ * Walk the IRQ list and dispose of an entry for a specific
+ * device, fd and number. Note - if sharing an IRQ for read
+ * and writefor the same FD it will be disposed in either case.
+ * If this behaviour is undesirable use different IRQ ids.
+ */
 
-static int same_fd(struct irq_fd *irq, void *fd)
-{
-       return (irq->fd == *((int *)fd));
-}
+#define IGNORE_IRQ 1
+#define IGNORE_DEV (1<<1)
 
-void free_irq_by_fd(int fd)
+static void do_free_by_irq_and_dev(
+       struct irq_entry *irq_entry,
+       unsigned int irq,
+       void *dev,
+       int flags
+)
 {
-       free_irq_by_cb(same_fd, &fd);
+       int i;
+       struct irq_fd *to_free;
+
+       for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+               if (irq_entry->irq_array[i] != NULL) {
+                       if (
+                       ((flags & IGNORE_IRQ) ||
+                               (irq_entry->irq_array[i]->irq == irq)) &&
+                       ((flags & IGNORE_DEV) ||
+                               (irq_entry->irq_array[i]->id == dev))
+                       ) {
+                               /* Turn off any IO on this fd - allows us to
+                                * avoid locking the IRQ loop
+                                */
+                               os_del_epoll_fd(irq_entry->fd);
+                               to_free = irq_entry->irq_array[i];
+                               irq_entry->irq_array[i] = NULL;
+                               assign_epoll_events_to_irq(irq_entry);
+                               if (to_free->active)
+                                       to_free->purge = true;
+                               else
+                                       kfree(to_free);
+                       }
+               }
+       }
 }
 
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+void free_irq_by_fd(int fd)
 {
-       struct irq_fd *irq;
-       int i = 0;
-       int fdi;
+       struct irq_entry *to_free;
+       unsigned long flags;
 
-       for (irq = active_fds; irq != NULL; irq = irq->next) {
-               if ((irq->fd == fd) && (irq->irq == irqnum))
-                       break;
-               i++;
-       }
-       if (irq == NULL) {
-               printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
-                      fd);
-               goto out;
-       }
-       fdi = os_get_pollfd(i);
-       if ((fdi != -1) && (fdi != fd)) {
-               printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
-                      "and pollfds, fd %d vs %d, need %d\n", irq->fd,
-                      fdi, fd);
-               irq = NULL;
-               goto out;
+       spin_lock_irqsave(&irq_lock, flags);
+       to_free = get_irq_entry_by_fd(fd);
+       if (to_free != NULL) {
+               do_free_by_irq_and_dev(
+                       to_free,
+                       -1,
+                       NULL,
+                       IGNORE_IRQ | IGNORE_DEV
+               );
        }
-       *index_out = i;
- out:
-       return irq;
+       garbage_collect_irq_entries();
+       spin_unlock_irqrestore(&irq_lock, flags);
 }
+EXPORT_SYMBOL(free_irq_by_fd);
 
-void reactivate_fd(int fd, int irqnum)
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
-       struct irq_fd *irq;
+       struct irq_entry *to_free;
        unsigned long flags;
-       int i;
 
        spin_lock_irqsave(&irq_lock, flags);
-       irq = find_irq_by_fd(fd, irqnum, &i);
-       if (irq == NULL) {
-               spin_unlock_irqrestore(&irq_lock, flags);
-               return;
+       to_free = active_fds;
+       while (to_free != NULL) {
+               do_free_by_irq_and_dev(
+                       to_free,
+                       irq,
+                       dev,
+                       0
+               );
+               to_free = to_free->next;
        }
-       os_set_pollfd(i, irq->fd);
+       garbage_collect_irq_entries();
        spin_unlock_irqrestore(&irq_lock, flags);
+}
 
-       add_sigio_fd(fd);
+
+void reactivate_fd(int fd, int irqnum)
+{
+       /** NOP - we do auto-EOI now **/
 }
 
 void deactivate_fd(int fd, int irqnum)
 {
-       struct irq_fd *irq;
+       struct irq_entry *to_free;
        unsigned long flags;
-       int i;
 
+       os_del_epoll_fd(fd);
        spin_lock_irqsave(&irq_lock, flags);
-       irq = find_irq_by_fd(fd, irqnum, &i);
-       if (irq == NULL) {
-               spin_unlock_irqrestore(&irq_lock, flags);
-               return;
+       to_free = get_irq_entry_by_fd(fd);
+       if (to_free != NULL) {
+               do_free_by_irq_and_dev(
+                       to_free,
+                       irqnum,
+                       NULL,
+                       IGNORE_DEV
+               );
        }
-
-       os_set_pollfd(i, -1);
+       garbage_collect_irq_entries();
        spin_unlock_irqrestore(&irq_lock, flags);
-
        ignore_sigio_fd(fd);
 }
 EXPORT_SYMBOL(deactivate_fd);
@@ -265,17 +386,28 @@ EXPORT_SYMBOL(deactivate_fd);
  */
 int deactivate_all_fds(void)
 {
-       struct irq_fd *irq;
-       int err;
+       unsigned long flags;
+       struct irq_entry *to_free;
 
-       for (irq = active_fds; irq != NULL; irq = irq->next) {
-               err = os_clear_fd_async(irq->fd);
-               if (err)
-                       return err;
-       }
-       /* If there is a signal already queued, after unblocking ignore it */
+       spin_lock_irqsave(&irq_lock, flags);
+       /* Stop IO. The IRQ loop has no lock so this is our
+        * only way of making sure we are safe to dispose
+        * of all IRQ handlers
+        */
        os_set_ioignore();
-
+       to_free = active_fds;
+       while (to_free != NULL) {
+               do_free_by_irq_and_dev(
+                       to_free,
+                       -1,
+                       NULL,
+                       IGNORE_IRQ | IGNORE_DEV
+               );
+               to_free = to_free->next;
+       }
+       garbage_collect_irq_entries();
+       spin_unlock_irqrestore(&irq_lock, flags);
+       os_close_epoll_fd();
        return 0;
 }
 
@@ -353,8 +485,11 @@ void __init init_IRQ(void)
 
        irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
 
+
        for (i = 1; i < NR_IRQS; i++)
                irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+       /* Initialize EPOLL Loop */
+       os_setup_epoll();
 }
 
 /*
index 7f69d17de3540ca8491270408946a00654493cd2..052de4c8acb2ec42c04d4fe30b267aa607e8a332 100644 (file)
@@ -121,12 +121,12 @@ static void __init um_timer_setup(void)
        clockevents_register_device(&timer_clockevent);
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
        long long nsecs = os_persistent_clock_emulation();
 
-       set_normalized_timespec(ts, nsecs / NSEC_PER_SEC,
-                               nsecs % NSEC_PER_SEC);
+       set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
+                                 nsecs % NSEC_PER_SEC);
 }
 
 void __init time_init(void)
index 2db18cbbb0eaba83aba45bd809ba4ed0834e80b1..c0197097c86e5075c146ad17142895e70b9fe25c 100644 (file)
@@ -12,6 +12,7 @@
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/sysmacros.h>
 #include <sys/un.h>
 #include <sys/types.h>
 #include <os.h>
index b9afb74b79ad05d1d1f3e967057e3152283c42eb..365823010346a7567c63cde69009488d5d61d54d 100644 (file)
 /*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <stdlib.h>
 #include <errno.h>
-#include <poll.h>
+#include <sys/epoll.h>
 #include <signal.h>
 #include <string.h>
 #include <irq_user.h>
 #include <os.h>
 #include <um_malloc.h>
 
+/* Epoll support */
+
+static int epollfd = -1;
+
+#define MAX_EPOLL_EVENTS 64
+
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+
+/* Helper to return an Epoll data pointer from an epoll event structure.
+ * We need to keep this one on the userspace side to keep includes separate
+ */
+
+void *os_epoll_get_data_pointer(int index)
+{
+       return epoll_events[index].data.ptr;
+}
+
+/* Helper to compare events versus the events in the epoll structure.
+ * Same as above - needs to be on the userspace side
+ */
+
+
+int os_epoll_triggered(int index, int events)
+{
+       return epoll_events[index].events & events;
+}
+/* Helper to set the event mask.
+ * The event mask is opaque to the kernel side, because it does not have
+ * access to the right includes/defines for EPOLL constants.
+ */
+
+int os_event_mask(int irq_type)
+{
+       if (irq_type == IRQ_READ)
+               return EPOLLIN | EPOLLPRI;
+       if (irq_type == IRQ_WRITE)
+               return EPOLLOUT;
+       return 0;
+}
+
 /*
- * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
- * and os_free_irq_by_cb, which are called under irq_lock.
+ * Initial Epoll Setup
  */
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
+int os_setup_epoll(void)
+{
+       epollfd = epoll_create(MAX_EPOLL_EVENTS);
+       return epollfd;
+}
 
-int os_waiting_for_events(struct irq_fd *active_fds)
+/*
+ * Helper to run the actual epoll_wait
+ */
+int os_waiting_for_events_epoll(void)
 {
-       struct irq_fd *irq_fd;
-       int i, n, err;
+       int n, err;
 
-       n = poll(pollfds, pollfds_num, 0);
+       n = epoll_wait(epollfd,
+               (struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0);
        if (n < 0) {
                err = -errno;
                if (errno != EINTR)
-                       printk(UM_KERN_ERR "os_waiting_for_events:"
-                              " poll returned %d, errno = %d\n", n, errno);
+                       printk(
+                               UM_KERN_ERR "os_waiting_for_events:"
+                               " epoll returned %d, error = %s\n", n,
+                               strerror(errno)
+                       );
                return err;
        }
-
-       if (n == 0)
-               return 0;
-
-       irq_fd = active_fds;
-
-       for (i = 0; i < pollfds_num; i++) {
-               if (pollfds[i].revents != 0) {
-                       irq_fd->current_events = pollfds[i].revents;
-                       pollfds[i].fd = -1;
-               }
-               irq_fd = irq_fd->next;
-       }
        return n;
 }
 
-int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
-{
-       if (pollfds_num == pollfds_size) {
-               if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
-                       /* return min size needed for new pollfds area */
-                       return (pollfds_size + 1) * sizeof(pollfds[0]);
-               }
-
-               if (pollfds != NULL) {
-                       memcpy(tmp_pfd, pollfds,
-                              sizeof(pollfds[0]) * pollfds_size);
-                       /* remove old pollfds */
-                       kfree(pollfds);
-               }
-               pollfds = tmp_pfd;
-               pollfds_size++;
-       } else
-               kfree(tmp_pfd); /* remove not used tmp_pfd */
-
-       pollfds[pollfds_num] = ((struct pollfd) { .fd           = fd,
-                                                 .events       = events,
-                                                 .revents      = 0 });
-       pollfds_num++;
-
-       return 0;
-}
 
-void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-               struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
+/*
+ * Helper to add a fd to epoll
+ */
+int os_add_epoll_fd(int events, int fd, void *data)
 {
-       struct irq_fd **prev;
-       int i = 0;
-
-       prev = &active_fds;
-       while (*prev != NULL) {
-               if ((*test)(*prev, arg)) {
-                       struct irq_fd *old_fd = *prev;
-                       if ((pollfds[i].fd != -1) &&
-                           (pollfds[i].fd != (*prev)->fd)) {
-                               printk(UM_KERN_ERR "os_free_irq_by_cb - "
-                                      "mismatch between active_fds and "
-                                      "pollfds, fd %d vs %d\n",
-                                      (*prev)->fd, pollfds[i].fd);
-                               goto out;
-                       }
-
-                       pollfds_num--;
-
-                       /*
-                        * This moves the *whole* array after pollfds[i]
-                        * (though it doesn't spot as such)!
-                        */
-                       memmove(&pollfds[i], &pollfds[i + 1],
-                              (pollfds_num - i) * sizeof(pollfds[0]));
-                       if (*last_irq_ptr2 == &old_fd->next)
-                               *last_irq_ptr2 = prev;
-
-                       *prev = (*prev)->next;
-                       if (old_fd->type == IRQ_WRITE)
-                               ignore_sigio_fd(old_fd->fd);
-                       kfree(old_fd);
-                       continue;
-               }
-               prev = &(*prev)->next;
-               i++;
-       }
- out:
-       return;
+       struct epoll_event event;
+       int result;
+
+       event.data.ptr = data;
+       event.events = events | EPOLLET;
+       result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+       if ((result) && (errno == EEXIST))
+               result = os_mod_epoll_fd(events, fd, data);
+       if (result)
+               printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
+       return result;
 }
 
-int os_get_pollfd(int i)
+/*
+ * Helper to mod the fd event mask and/or data backreference
+ */
+int os_mod_epoll_fd(int events, int fd, void *data)
 {
-       return pollfds[i].fd;
+       struct epoll_event event;
+       int result;
+
+       event.data.ptr = data;
+       event.events = events;
+       result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
+       if (result)
+               printk(UM_KERN_ERR
+                       "epollctl mod err fd %d, %s\n", fd, strerror(errno));
+       return result;
 }
 
-void os_set_pollfd(int i, int fd)
+/*
+ * Helper to delete the epoll fd
+ */
+int os_del_epoll_fd(int fd)
 {
-       pollfds[i].fd = fd;
+       struct epoll_event event;
+       int result;
+       /* This is quiet as we use this as IO ON/OFF - so it is often
+        * invoked on a non-existent fd
+        */
+       result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
+       return result;
 }
 
 void os_set_ioignore(void)
 {
        signal(SIGIO, SIG_IGN);
 }
+
+void os_close_epoll_fd(void)
+{
+       /* Needed so we do not leak an fd when rebooting */
+       os_close_file(epollfd);
+}
index a86d7cc2c2d82fa7d0e5c13d1f3188efcd660fba..bf0acb8aad8b20e31a4591ec5f1889d38d68d010 100644 (file)
@@ -16,6 +16,7 @@
 #include <os.h>
 #include <sysdep/mcontext.h>
 #include <um_malloc.h>
+#include <sys/ucontext.h>
 
 void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
        [SIGTRAP]       = relay_signal,
@@ -159,7 +160,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
 
 static void hard_handler(int sig, siginfo_t *si, void *p)
 {
-       struct ucontext *uc = p;
+       ucontext_t *uc = p;
        mcontext_t *mc = &uc->uc_mcontext;
        unsigned long pending = 1UL << sig;
 
index a5e08e2d5d6d0ba8a8772abdbd35070b349d8d95..1d9132b66039a2366d122400b733562c9eba68ec 100644 (file)
@@ -170,10 +170,8 @@ extern void flush_cache_page(struct vm_area_struct *vma,
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
-#define flush_dcache_mmap_lock(mapping)                        \
-       spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping)              \
-       spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)                do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)      do { } while (0)
 
 #define flush_icache_user_range(vma, page, addr, len)  \
        flush_dcache_page(page)
index 3bb0a29fd2d7b11739e9a22247f4d2edaa1d9403..66bb9f6525c04ca427ae4d4440246106aa37dd9e 100644 (file)
 #include <asm/sizes.h>
 #include <mach/memory.h>
 
-/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
 /*
  * PAGE_OFFSET - the virtual address of the start of the kernel image
  * TASK_SIZE - the maximum size of a user space task.
index 199e15bd3ec51514f06aceb761a28f0b9dae59e6..ce8b4da07e35c7e3f60b78f318b4a52a000c11ac 100644 (file)
@@ -122,12 +122,14 @@ struct x86_init_pci {
  * @guest_late_init:           guest late init
  * @x2apic_available:          X2APIC detection
  * @init_mem_mapping:          setup early mappings during init_mem_mapping()
+ * @init_after_bootmem:                guest init after boot allocator is finished
  */
 struct x86_hyper_init {
        void (*init_platform)(void);
        void (*guest_late_init)(void);
        bool (*x2apic_available)(void);
        void (*init_mem_mapping)(void);
+       void (*init_after_bootmem)(void);
 };
 
 /**
index ebda84a915102f236752d392b67f55245c4ea89a..3ab867603e81f39ba879a6bdfdefa95af052436f 100644 (file)
@@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = {
                .guest_late_init        = x86_init_noop,
                .x2apic_available       = bool_x86_init_noop,
                .init_mem_mapping       = x86_init_noop,
+               .init_after_bootmem     = x86_init_noop,
        },
 
        .acpi = {
index 396e1f0151ac1973de4339a8946653f95aecb66d..8008db2bddb36aaa071065dc273a8d2a65adc34a 100644 (file)
@@ -778,6 +778,7 @@ void __init mem_init(void)
        free_all_bootmem();
 
        after_bootmem = 1;
+       x86_init.hyper.init_after_bootmem();
 
        mem_init_print_info(NULL);
        printk(KERN_INFO "virtual kernel memory layout:\n"
index dca9abf2b85c3f09ccc3f9c7f0b86606c044502a..66de40e45f5877e234a044ae565ab9bcdf0ae4a0 100644 (file)
@@ -1185,6 +1185,7 @@ void __init mem_init(void)
        /* this will put all memory onto the freelists */
        free_all_bootmem();
        after_bootmem = 1;
+       x86_init.hyper.init_after_bootmem();
 
        /*
         * Must be done after boot memory is put on freelist, because here we
index 155ecbac9e28f10c2f83cdbf48037a2f8f6a44fe..48c59125160029bb05ee88dc6eed491807fd1131 100644 (file)
@@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void)
        return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
 }
 
-static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
+static unsigned long mmap_base(unsigned long rnd, unsigned long task_size,
+                              struct rlimit *rlim_stack)
 {
-       unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long gap = rlim_stack->rlim_cur;
        unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
        unsigned long gap_min, gap_max;
 
@@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd,
  * process VM image, sets up which VM layout function to use:
  */
 static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
-               unsigned long random_factor, unsigned long task_size)
+               unsigned long random_factor, unsigned long task_size,
+               struct rlimit *rlim_stack)
 {
        *legacy_base = mmap_legacy_base(random_factor, task_size);
        if (mmap_is_legacy())
                *base = *legacy_base;
        else
-               *base = mmap_base(random_factor, task_size);
+               *base = mmap_base(random_factor, task_size, rlim_stack);
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        if (mmap_is_legacy())
                mm->get_unmapped_area = arch_get_unmapped_area;
@@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
                mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 
        arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
-                       arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
+                       arch_rnd(mmap64_rnd_bits), task_size_64bit(0),
+                       rlim_stack);
 
 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
        /*
@@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
         * mmap_base, the compat syscall uses mmap_compat_base.
         */
        arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
-                       arch_rnd(mmap32_rnd_bits), task_size_32bit());
+                       arch_rnd(mmap32_rnd_bits), task_size_32bit(),
+                       rlim_stack);
 #endif
 }
 
index 1518d2805ae81733eb0629989f4d39ae595502c2..27361cbb7ca9bed47194fe4127e231a8d3c39c0d 100644 (file)
@@ -6,11 +6,12 @@
 #include <sysdep/stub.h>
 #include <sysdep/faultinfo.h>
 #include <sysdep/mcontext.h>
+#include <sys/ucontext.h>
 
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig, siginfo_t *info, void *p)
 {
-       struct ucontext *uc = p;
+       ucontext_t *uc = p;
 
        GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
                              &uc->uc_mcontext);
index 3c2c2530737efc717c9945ee1e2990e5b5bfd4c5..c36d23aa6c3502a004d1357027dca057fca7e3a6 100644 (file)
@@ -1259,10 +1259,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
         */
        __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
 
-       /* Work out if we support NX */
-       get_cpu_cap(&boot_cpu_data);
-       x86_configure_nx();
-
        /* Get mfn list */
        xen_build_dynamic_phys_to_machine();
 
@@ -1272,6 +1268,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
         */
        xen_setup_gdt(0);
 
+       /* Work out if we support NX */
+       get_cpu_cap(&boot_cpu_data);
+       x86_configure_nx();
+
        xen_init_irq_ops();
 
        /* Let's presume PV guests always boot on vCPU with id 0. */
index d20763472920d257943fd96cfb46413321f8bdca..486c0a34d00b2b75a27467aa46910142c108e373 100644 (file)
@@ -116,6 +116,8 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3);      /* actual vcpu cr3 */
 
 static phys_addr_t xen_pt_base, xen_pt_size __initdata;
 
+static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready);
+
 /*
  * Just beyond the highest usermode address.  STACK_TOP_MAX has a
  * redzone above it, so round it up to a PGD boundary.
@@ -155,11 +157,18 @@ void make_lowmem_page_readwrite(void *vaddr)
 }
 
 
+/*
+ * During early boot all page table pages are pinned, but we do not have struct
+ * pages, so return true until struct pages are ready.
+ */
 static bool xen_page_pinned(void *ptr)
 {
-       struct page *page = virt_to_page(ptr);
+       if (static_branch_likely(&xen_struct_pages_ready)) {
+               struct page *page = virt_to_page(ptr);
 
-       return PagePinned(page);
+               return PagePinned(page);
+       }
+       return true;
 }
 
 static void xen_extend_mmu_update(const struct mmu_update *update)
@@ -836,11 +845,6 @@ void xen_mm_pin_all(void)
        spin_unlock(&pgd_lock);
 }
 
-/*
- * The init_mm pagetable is really pinned as soon as its created, but
- * that's before we have page structures to store the bits.  So do all
- * the book-keeping now.
- */
 static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
                                  enum pt_level level)
 {
@@ -848,8 +852,18 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
        return 0;
 }
 
-static void __init xen_mark_init_mm_pinned(void)
+/*
+ * The init_mm pagetable is really pinned as soon as its created, but
+ * that's before we have page structures to store the bits.  So do all
+ * the book-keeping now once struct pages for allocated pages are
+ * initialized. This happens only after free_all_bootmem() is called.
+ */
+static void __init xen_after_bootmem(void)
 {
+       static_branch_enable(&xen_struct_pages_ready);
+#ifdef CONFIG_X86_64
+       SetPagePinned(virt_to_page(level3_user_vsyscall));
+#endif
        xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 }
 
@@ -1623,14 +1637,15 @@ static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
 static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
                                    unsigned level)
 {
-       bool pinned = PagePinned(virt_to_page(mm->pgd));
+       bool pinned = xen_page_pinned(mm->pgd);
 
        trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
 
        if (pinned) {
                struct page *page = pfn_to_page(pfn);
 
-               SetPagePinned(page);
+               if (static_branch_likely(&xen_struct_pages_ready))
+                       SetPagePinned(page);
 
                if (!PageHighMem(page)) {
                        xen_mc_batch();
@@ -2364,9 +2379,7 @@ static void __init xen_post_allocator_init(void)
 
 #ifdef CONFIG_X86_64
        pv_mmu_ops.write_cr3 = &xen_write_cr3;
-       SetPagePinned(virt_to_page(level3_user_vsyscall));
 #endif
-       xen_mark_init_mm_pinned();
 }
 
 static void xen_leave_lazy_mmu(void)
@@ -2450,6 +2463,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 void __init xen_init_mmu_ops(void)
 {
        x86_init.paging.pagetable_init = xen_pagetable_init;
+       x86_init.hyper.init_after_bootmem = xen_after_bootmem;
 
        pv_mmu_ops = xen_mmu_ops;
 
index c0c756c76afebf08d282fb35c2fc49b641120bb4..2e20ae2fa2d6c3b865f2c745ad9896a752954907 100644 (file)
@@ -425,6 +425,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
         * data back is to call:
         */
        tick_nohz_idle_enter();
+       tick_nohz_idle_stop_tick_protected();
 
        cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
 }
index 96f26e026783d8dd04078aeb5ddc8ff9a3f29e06..5077ead5e59cad4b4fc1db004237a746bf6ca960 100644 (file)
@@ -89,7 +89,9 @@ END(hypercall_page)
        ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
                .ascii "!writable_page_tables|pae_pgdir_above_4gb")
        ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
-               .long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0))
+               .long (1 << XENFEAT_writable_page_tables) |       \
+                     (1 << XENFEAT_dom0) |                       \
+                     (1 << XENFEAT_linux_rsdp_unrestricted))
        ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
        ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
        ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
index 3e9d01ada81ffbb98fa58164212a947611988aef..58f29a9d895d83e98c12020bf501b9f3d063eaec 100644 (file)
@@ -57,6 +57,7 @@
 #define MAP_NONBLOCK   0x20000         /* do not block on IO */
 #define MAP_STACK      0x40000         /* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB    0x80000         /* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000   /* MAP_FIXED which doesn't unmap underlying mapping */
 #ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
 # define MAP_UNINITIALIZED 0x4000000   /* For anonymous mmap, memory could be
                                         * uninitialized */
index c0dabed5122e23ea68952bc3ccd9857911653632..76e8c88c97b4da98204fa225888ea8da4fdd050e 100644 (file)
@@ -500,6 +500,15 @@ config CRYPTO_CRC32_PCLMUL
          which will enable any routine to use the CRC-32-IEEE 802.3 checksum
          and gain better performance as compared with the table implementation.
 
+config CRYPTO_CRC32_MIPS
+       tristate "CRC32c and CRC32 CRC algorithm (MIPS)"
+       depends on MIPS_CRC_SUPPORT
+       select CRYPTO_HASH
+       help
+         CRC32c and CRC32 CRC algorithms implemented using mips crypto
+         instructions, when available.
+
+
 config CRYPTO_CRCT10DIF
        tristate "CRCT10DIF algorithm"
        select CRYPTO_HASH
index c49766b03165ce095d218b09f0e22e7765e05388..7846c0c20cfec1998674e18e0a516a89caea509f 100644 (file)
@@ -158,16 +158,16 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        void *private;
        int err;
 
-       /* If caller uses non-allowed flag, return error. */
-       if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed))
-               return -EINVAL;
-
        if (sock->state == SS_CONNECTED)
                return -EINVAL;
 
        if (addr_len < sizeof(*sa))
                return -EINVAL;
 
+       /* If caller uses non-allowed flag, return error. */
+       if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed))
+               return -EINVAL;
+
        sa->salg_type[sizeof(sa->salg_type) - 1] = 0;
        sa->salg_name[sizeof(sa->salg_name) + addr_len - sizeof(*sa) - 1] = 0;
 
index 9e702bc4960f0b3e279574e388027e5528b68034..7a3a541046ed1610d3979c6c83fee3c0a3b63f53 100644 (file)
@@ -34,6 +34,7 @@
 struct iort_its_msi_chip {
        struct list_head        list;
        struct fwnode_handle    *fw_node;
+       phys_addr_t             base_addr;
        u32                     translation_id;
 };
 
@@ -156,14 +157,16 @@ static LIST_HEAD(iort_msi_chip_list);
 static DEFINE_SPINLOCK(iort_msi_chip_lock);
 
 /**
- * iort_register_domain_token() - register domain token and related ITS ID
- * to the list from where we can get it back later on.
+ * iort_register_domain_token() - register domain token along with related
+ * ITS ID and base address to the list from where we can get it back later on.
  * @trans_id: ITS ID.
+ * @base: ITS base address.
  * @fw_node: Domain token.
  *
  * Returns: 0 on success, -ENOMEM if no memory when allocating list element
  */
-int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
+int iort_register_domain_token(int trans_id, phys_addr_t base,
+                              struct fwnode_handle *fw_node)
 {
        struct iort_its_msi_chip *its_msi_chip;
 
@@ -173,6 +176,7 @@ int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
 
        its_msi_chip->fw_node = fw_node;
        its_msi_chip->translation_id = trans_id;
+       its_msi_chip->base_addr = base;
 
        spin_lock(&iort_msi_chip_lock);
        list_add(&its_msi_chip->list, &iort_msi_chip_list);
@@ -569,6 +573,24 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
        return -ENODEV;
 }
 
+static int __maybe_unused iort_find_its_base(u32 its_id, phys_addr_t *base)
+{
+       struct iort_its_msi_chip *its_msi_chip;
+       int ret = -ENODEV;
+
+       spin_lock(&iort_msi_chip_lock);
+       list_for_each_entry(its_msi_chip, &iort_msi_chip_list, list) {
+               if (its_msi_chip->translation_id == its_id) {
+                       *base = its_msi_chip->base_addr;
+                       ret = 0;
+                       break;
+               }
+       }
+       spin_unlock(&iort_msi_chip_lock);
+
+       return ret;
+}
+
 /**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
@@ -754,6 +776,24 @@ static inline bool iort_iommu_driver_enabled(u8 type)
 }
 
 #ifdef CONFIG_IOMMU_API
+static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
+{
+       struct acpi_iort_node *iommu;
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+       iommu = iort_get_iort_node(fwspec->iommu_fwnode);
+
+       if (iommu && (iommu->type == ACPI_IORT_NODE_SMMU_V3)) {
+               struct acpi_iort_smmu_v3 *smmu;
+
+               smmu = (struct acpi_iort_smmu_v3 *)iommu->node_data;
+               if (smmu->model == ACPI_IORT_SMMU_V3_HISILICON_HI161X)
+                       return iommu;
+       }
+
+       return NULL;
+}
+
 static inline const struct iommu_ops *iort_fwspec_iommu_ops(
                                struct iommu_fwspec *fwspec)
 {
@@ -770,6 +810,69 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops,
 
        return err;
 }
+
+/**
+ * iort_iommu_msi_get_resv_regions - Reserved region driver helper
+ * @dev: Device from iommu_get_resv_regions()
+ * @head: Reserved region list from iommu_get_resv_regions()
+ *
+ * Returns: Number of msi reserved regions on success (0 if platform
+ *          doesn't require the reservation or no associated msi regions),
+ *          appropriate error value otherwise. The ITS interrupt translation
+ *          spaces (ITS_base + SZ_64K, SZ_64K) associated with the device
+ *          are the msi reserved regions.
+ */
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+{
+       struct acpi_iort_its_group *its;
+       struct acpi_iort_node *iommu_node, *its_node = NULL;
+       int i, resv = 0;
+
+       iommu_node = iort_get_msi_resv_iommu(dev);
+       if (!iommu_node)
+               return 0;
+
+       /*
+        * Current logic to reserve ITS regions relies on HW topologies
+        * where a given PCI or named component maps its IDs to only one
+        * ITS group; if a PCI or named component can map its IDs to
+        * different ITS groups through IORT mappings this function has
+        * to be reworked to ensure we reserve regions for all ITS groups
+        * a given PCI or named component may map IDs to.
+        */
+
+       for (i = 0; i < dev->iommu_fwspec->num_ids; i++) {
+               its_node = iort_node_map_id(iommu_node,
+                                       dev->iommu_fwspec->ids[i],
+                                       NULL, IORT_MSI_TYPE);
+               if (its_node)
+                       break;
+       }
+
+       if (!its_node)
+               return 0;
+
+       /* Move to ITS specific data */
+       its = (struct acpi_iort_its_group *)its_node->node_data;
+
+       for (i = 0; i < its->its_count; i++) {
+               phys_addr_t base;
+
+               if (!iort_find_its_base(its->identifiers[i], &base)) {
+                       int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+                       struct iommu_resv_region *region;
+
+                       region = iommu_alloc_resv_region(base + SZ_64K, SZ_64K,
+                                                        prot, IOMMU_RESV_MSI);
+                       if (region) {
+                               list_add_tail(&region->list, head);
+                               resv++;
+                       }
+               }
+       }
+
+       return (resv == its->its_count) ? resv : -ENODEV;
+}
 #else
 static inline const struct iommu_ops *iort_fwspec_iommu_ops(
                                struct iommu_fwspec *fwspec)
@@ -777,6 +880,8 @@ static inline const struct iommu_ops *iort_fwspec_iommu_ops(
 static inline int iort_add_device_replay(const struct iommu_ops *ops,
                                         struct device *dev)
 { return 0; }
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+{ return 0; }
 #endif
 
 static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node,
index 22a112b4f4d84cf120d21a95b2722ccf04c9f7e6..e2235ed3e4beb266f225625093448d6874d083be 100644 (file)
@@ -36,16 +36,6 @@ static bool force_enable_dimms;
 module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
 
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
-/* after three payloads of overflow, it's dead jim */
-static unsigned int scrub_overflow_abort = 3;
-module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_overflow_abort,
-               "Number of times we overflow ARS results before abort");
-
 static bool disable_vendor_specific;
 module_param(disable_vendor_specific, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_vendor_specific,
@@ -60,6 +50,10 @@ module_param(default_dsm_family, int, S_IRUGO);
 MODULE_PARM_DESC(default_dsm_family,
                "Try this DSM type first when identifying NVDIMM family");
 
+static bool no_init_ars;
+module_param(no_init_ars, bool, 0644);
+MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time");
+
 LIST_HEAD(acpi_descs);
 DEFINE_MUTEX(acpi_desc_lock);
 
@@ -197,7 +191,7 @@ static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd
                 * In the _LSI, _LSR, _LSW case the locked status is
                 * communicated via the read/write commands
                 */
-               if (nfit_mem->has_lsi)
+               if (nfit_mem->has_lsr)
                        break;
 
                if (status >> 16 & ND_CONFIG_LOCKED)
@@ -477,14 +471,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
                in_buf.buffer.length = call_pkg->nd_size_in;
        }
 
-       dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
-                       __func__, dimm_name, cmd, func, in_buf.buffer.length);
+       dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n",
+               dimm_name, cmd, func, in_buf.buffer.length);
        print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
                        in_buf.buffer.pointer,
                        min_t(u32, 256, in_buf.buffer.length), true);
 
        /* call the BIOS, prefer the named methods over _DSM if available */
-       if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsi)
+       if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsr)
                out_obj = acpi_label_info(handle);
        else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) {
                struct nd_cmd_get_config_data_hdr *p = buf;
@@ -507,8 +501,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
        }
 
        if (!out_obj) {
-               dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
-                               cmd_name);
+               dev_dbg(dev, "%s _DSM failed cmd: %s\n", dimm_name, cmd_name);
                return -EINVAL;
        }
 
@@ -529,13 +522,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
        }
 
        if (out_obj->package.type != ACPI_TYPE_BUFFER) {
-               dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
-                               __func__, dimm_name, cmd_name, out_obj->type);
+               dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
+                               dimm_name, cmd_name, out_obj->type);
                rc = -EINVAL;
                goto out;
        }
 
-       dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, dimm_name,
+       dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
                        cmd_name, out_obj->buffer.length);
        print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
                        out_obj->buffer.pointer,
@@ -547,14 +540,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
                                out_obj->buffer.length - offset);
 
                if (offset + out_size > out_obj->buffer.length) {
-                       dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
-                                       __func__, dimm_name, cmd_name, i);
+                       dev_dbg(dev, "%s output object underflow cmd: %s field: %d\n",
+                                       dimm_name, cmd_name, i);
                        break;
                }
 
                if (in_buf.buffer.length + offset + out_size > buf_len) {
-                       dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
-                                       __func__, dimm_name, cmd_name, i);
+                       dev_dbg(dev, "%s output overrun cmd: %s field: %d\n",
+                                       dimm_name, cmd_name, i);
                        rc = -ENXIO;
                        goto out;
                }
@@ -656,7 +649,7 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
        INIT_LIST_HEAD(&nfit_spa->list);
        memcpy(nfit_spa->spa, spa, sizeof(*spa));
        list_add_tail(&nfit_spa->list, &acpi_desc->spas);
-       dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
+       dev_dbg(dev, "spa index: %d type: %s\n",
                        spa->range_index,
                        spa_type_name(nfit_spa_type(spa)));
        return true;
@@ -685,8 +678,8 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
        INIT_LIST_HEAD(&nfit_memdev->list);
        memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
        list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
-       dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
-                       __func__, memdev->device_handle, memdev->range_index,
+       dev_dbg(dev, "memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
+                       memdev->device_handle, memdev->range_index,
                        memdev->region_index, memdev->flags);
        return true;
 }
@@ -754,7 +747,7 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
        INIT_LIST_HEAD(&nfit_dcr->list);
        memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
        list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
-       dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
+       dev_dbg(dev, "dcr index: %d windows: %d\n",
                        dcr->region_index, dcr->windows);
        return true;
 }
@@ -781,7 +774,7 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
        INIT_LIST_HEAD(&nfit_bdw->list);
        memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
        list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
-       dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
+       dev_dbg(dev, "bdw dcr: %d windows: %d\n",
                        bdw->region_index, bdw->windows);
        return true;
 }
@@ -820,7 +813,7 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc,
        INIT_LIST_HEAD(&nfit_idt->list);
        memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
        list_add_tail(&nfit_idt->list, &acpi_desc->idts);
-       dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
+       dev_dbg(dev, "idt index: %d num_lines: %d\n",
                        idt->interleave_index, idt->line_count);
        return true;
 }
@@ -860,7 +853,7 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
        INIT_LIST_HEAD(&nfit_flush->list);
        memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
        list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
-       dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
+       dev_dbg(dev, "nfit_flush handle: %d hint_count: %d\n",
                        flush->device_handle, flush->hint_count);
        return true;
 }
@@ -873,7 +866,7 @@ static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc,
 
        mask = (1 << (pcap->highest_capability + 1)) - 1;
        acpi_desc->platform_cap = pcap->capabilities & mask;
-       dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap);
+       dev_dbg(dev, "cap: %#x\n", acpi_desc->platform_cap);
        return true;
 }
 
@@ -920,7 +913,7 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc,
                        return err;
                break;
        case ACPI_NFIT_TYPE_SMBIOS:
-               dev_dbg(dev, "%s: smbios\n", __func__);
+               dev_dbg(dev, "smbios\n");
                break;
        case ACPI_NFIT_TYPE_CAPABILITIES:
                if (!add_platform_cap(acpi_desc, table))
@@ -1277,8 +1270,11 @@ static ssize_t scrub_show(struct device *dev,
        if (nd_desc) {
                struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
+               mutex_lock(&acpi_desc->init_mutex);
                rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
-                               (work_busy(&acpi_desc->work)) ? "+\n" : "\n");
+                               work_busy(&acpi_desc->dwork.work)
+                               && !acpi_desc->cancel ? "+\n" : "\n");
+               mutex_unlock(&acpi_desc->init_mutex);
        }
        device_unlock(dev);
        return rc;
@@ -1648,7 +1644,7 @@ void __acpi_nvdimm_notify(struct device *dev, u32 event)
        struct nfit_mem *nfit_mem;
        struct acpi_nfit_desc *acpi_desc;
 
-       dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__,
+       dev_dbg(dev->parent, "%s: event: %d\n", dev_name(dev),
                        event);
 
        if (event != NFIT_NOTIFY_DIMM_HEALTH) {
@@ -1681,12 +1677,23 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
        device_unlock(dev->parent);
 }
 
+static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
+{
+       acpi_handle handle;
+       acpi_status status;
+
+       status = acpi_get_handle(adev->handle, method, &handle);
+
+       if (ACPI_SUCCESS(status))
+               return true;
+       return false;
+}
+
 static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                struct nfit_mem *nfit_mem, u32 device_handle)
 {
        struct acpi_device *adev, *adev_dimm;
        struct device *dev = acpi_desc->dev;
-       union acpi_object *obj;
        unsigned long dsm_mask;
        const guid_t *guid;
        int i;
@@ -1759,25 +1766,15 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                                        1ULL << i))
                        set_bit(i, &nfit_mem->dsm_mask);
 
-       obj = acpi_label_info(adev_dimm->handle);
-       if (obj) {
-               ACPI_FREE(obj);
-               nfit_mem->has_lsi = 1;
-               dev_dbg(dev, "%s: has _LSI\n", dev_name(&adev_dimm->dev));
-       }
-
-       obj = acpi_label_read(adev_dimm->handle, 0, 0);
-       if (obj) {
-               ACPI_FREE(obj);
-               nfit_mem->has_lsr = 1;
+       if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
+                       && acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
                dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
+               nfit_mem->has_lsr = true;
        }
 
-       obj = acpi_label_write(adev_dimm->handle, 0, 0, NULL);
-       if (obj) {
-               ACPI_FREE(obj);
-               nfit_mem->has_lsw = 1;
+       if (nfit_mem->has_lsr && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
                dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
+               nfit_mem->has_lsw = true;
        }
 
        return 0;
@@ -1866,10 +1863,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
                        cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
                }
 
-               if (nfit_mem->has_lsi)
+               if (nfit_mem->has_lsr) {
                        set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
-               if (nfit_mem->has_lsr)
                        set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
+               }
                if (nfit_mem->has_lsw)
                        set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
 
@@ -2365,7 +2362,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
        nvdimm = nd_blk_region_to_dimm(ndbr);
        nfit_mem = nvdimm_provider_data(nvdimm);
        if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
-               dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
+               dev_dbg(dev, "missing%s%s%s\n",
                                nfit_mem ? "" : " nfit_mem",
                                (nfit_mem && nfit_mem->dcr) ? "" : " dcr",
                                (nfit_mem && nfit_mem->bdw) ? "" : " bdw");
@@ -2384,7 +2381,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
        mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
                         nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr));
        if (!mmio->addr.base) {
-               dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
+               dev_dbg(dev, "%s failed to map bdw\n",
                                nvdimm_name(nvdimm));
                return -ENOMEM;
        }
@@ -2395,8 +2392,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
        rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
                        nfit_mem->memdev_bdw->interleave_ways);
        if (rc) {
-               dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
-                               __func__, nvdimm_name(nvdimm));
+               dev_dbg(dev, "%s failed to init bdw interleave\n",
+                               nvdimm_name(nvdimm));
                return rc;
        }
 
@@ -2407,7 +2404,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
        mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
                        nfit_mem->spa_dcr->length);
        if (!mmio->addr.base) {
-               dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
+               dev_dbg(dev, "%s failed to map dcr\n",
                                nvdimm_name(nvdimm));
                return -ENOMEM;
        }
@@ -2418,15 +2415,15 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
        rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
                        nfit_mem->memdev_dcr->interleave_ways);
        if (rc) {
-               dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
-                               __func__, nvdimm_name(nvdimm));
+               dev_dbg(dev, "%s failed to init dcr interleave\n",
+                               nvdimm_name(nvdimm));
                return rc;
        }
 
        rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
        if (rc < 0) {
-               dev_dbg(dev, "%s: %s failed get DIMM flags\n",
-                               __func__, nvdimm_name(nvdimm));
+               dev_dbg(dev, "%s failed get DIMM flags\n",
+                               nvdimm_name(nvdimm));
                return rc;
        }
 
@@ -2476,7 +2473,8 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
        memset(&ars_start, 0, sizeof(ars_start));
        ars_start.address = spa->address;
        ars_start.length = spa->length;
-       ars_start.flags = acpi_desc->ars_start_flags;
+       if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
+               ars_start.flags = ND_ARS_RETURN_PREV_DATA;
        if (nfit_spa_type(spa) == NFIT_SPA_PM)
                ars_start.type = ND_ARS_PERSISTENT;
        else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
@@ -2518,16 +2516,62 @@ static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
        int rc, cmd_rc;
 
        rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
-                       acpi_desc->ars_status_size, &cmd_rc);
+                       acpi_desc->max_ars, &cmd_rc);
        if (rc < 0)
                return rc;
        return cmd_rc;
 }
 
-static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc,
-               struct nd_cmd_ars_status *ars_status)
+static void ars_complete(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_spa *nfit_spa)
+{
+       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+       struct acpi_nfit_system_address *spa = nfit_spa->spa;
+       struct nd_region *nd_region = nfit_spa->nd_region;
+       struct device *dev;
+
+       if ((ars_status->address >= spa->address && ars_status->address
+                               < spa->address + spa->length)
+                       || (ars_status->address < spa->address)) {
+               /*
+                * Assume that if a scrub starts at an offset from the
+                * start of nfit_spa that we are in the continuation
+                * case.
+                *
+                * Otherwise, if the scrub covers the spa range, mark
+                * any pending request complete.
+                */
+               if (ars_status->address + ars_status->length
+                               >= spa->address + spa->length)
+                               /* complete */;
+               else
+                       return;
+       } else
+               return;
+
+       if (test_bit(ARS_DONE, &nfit_spa->ars_state))
+               return;
+
+       if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
+               return;
+
+       if (nd_region) {
+               dev = nd_region_dev(nd_region);
+               nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
+       } else
+               dev = acpi_desc->dev;
+
+       dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
+                       test_bit(ARS_SHORT, &nfit_spa->ars_state)
+                       ? "short" : "long");
+       clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+       set_bit(ARS_DONE, &nfit_spa->ars_state);
+}
+
+static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
 {
        struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
+       struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
        int rc;
        u32 i;
 
@@ -2606,7 +2650,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
        struct acpi_nfit_system_address *spa = nfit_spa->spa;
        struct nd_blk_region_desc *ndbr_desc;
        struct nfit_mem *nfit_mem;
-       int blk_valid = 0, rc;
+       int rc;
 
        if (!nvdimm) {
                dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
@@ -2626,15 +2670,14 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
                if (!nfit_mem || !nfit_mem->bdw) {
                        dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
                                        spa->range_index, nvdimm_name(nvdimm));
-               } else {
-                       mapping->size = nfit_mem->bdw->capacity;
-                       mapping->start = nfit_mem->bdw->start_address;
-                       ndr_desc->num_lanes = nfit_mem->bdw->windows;
-                       blk_valid = 1;
+                       break;
                }
 
+               mapping->size = nfit_mem->bdw->capacity;
+               mapping->start = nfit_mem->bdw->start_address;
+               ndr_desc->num_lanes = nfit_mem->bdw->windows;
                ndr_desc->mapping = mapping;
-               ndr_desc->num_mappings = blk_valid;
+               ndr_desc->num_mappings = 1;
                ndbr_desc = to_blk_region_desc(ndr_desc);
                ndbr_desc->enable = acpi_nfit_blk_region_enable;
                ndbr_desc->do_io = acpi_desc->blk_do_io;
@@ -2682,8 +2725,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
                return 0;
 
        if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
-               dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
-                               __func__);
+               dev_dbg(acpi_desc->dev, "detected invalid spa index\n");
                return 0;
        }
 
@@ -2769,301 +2811,243 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
        return rc;
 }
 
-static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
-               u32 max_ars)
+static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc)
 {
        struct device *dev = acpi_desc->dev;
        struct nd_cmd_ars_status *ars_status;
 
-       if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
-               memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
+       if (acpi_desc->ars_status) {
+               memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
                return 0;
        }
 
-       if (acpi_desc->ars_status)
-               devm_kfree(dev, acpi_desc->ars_status);
-       acpi_desc->ars_status = NULL;
-       ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
+       ars_status = devm_kzalloc(dev, acpi_desc->max_ars, GFP_KERNEL);
        if (!ars_status)
                return -ENOMEM;
        acpi_desc->ars_status = ars_status;
-       acpi_desc->ars_status_size = max_ars;
        return 0;
 }
 
-static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_spa *nfit_spa)
+static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
 {
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
        int rc;
 
-       if (!nfit_spa->max_ars) {
-               struct nd_cmd_ars_cap ars_cap;
-
-               memset(&ars_cap, 0, sizeof(ars_cap));
-               rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
-               if (rc < 0)
-                       return rc;
-               nfit_spa->max_ars = ars_cap.max_ars_out;
-               nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
-               /* check that the supported scrub types match the spa type */
-               if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
-                               ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
-                       return -ENOTTY;
-               else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
-                               ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
-                       return -ENOTTY;
-       }
-
-       if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
+       if (ars_status_alloc(acpi_desc))
                return -ENOMEM;
 
        rc = ars_get_status(acpi_desc);
+
        if (rc < 0 && rc != -ENOSPC)
                return rc;
 
-       if (ars_status_process_records(acpi_desc, acpi_desc->ars_status))
+       if (ars_status_process_records(acpi_desc))
                return -ENOMEM;
 
        return 0;
 }
 
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
-               struct nfit_spa *nfit_spa)
+static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
+               int *query_rc)
 {
-       struct acpi_nfit_system_address *spa = nfit_spa->spa;
-       unsigned int overflow_retry = scrub_overflow_abort;
-       u64 init_ars_start = 0, init_ars_len = 0;
-       struct device *dev = acpi_desc->dev;
-       unsigned int tmo = scrub_timeout;
-       int rc;
+       int rc = *query_rc;
 
-       if (!nfit_spa->ars_required || !nfit_spa->nd_region)
-               return;
+       if (no_init_ars)
+               return acpi_nfit_register_region(acpi_desc, nfit_spa);
 
-       rc = ars_start(acpi_desc, nfit_spa);
-       /*
-        * If we timed out the initial scan we'll still be busy here,
-        * and will wait another timeout before giving up permanently.
-        */
-       if (rc < 0 && rc != -EBUSY)
-               return;
-
-       do {
-               u64 ars_start, ars_len;
-
-               if (acpi_desc->cancel)
-                       break;
-               rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-               if (rc == -ENOTTY)
-                       break;
-               if (rc == -EBUSY && !tmo) {
-                       dev_warn(dev, "range %d ars timeout, aborting\n",
-                                       spa->range_index);
-                       break;
-               }
+       set_bit(ARS_REQ, &nfit_spa->ars_state);
+       set_bit(ARS_SHORT, &nfit_spa->ars_state);
 
+       switch (rc) {
+       case 0:
+       case -EAGAIN:
+               rc = ars_start(acpi_desc, nfit_spa);
                if (rc == -EBUSY) {
-                       /*
-                        * Note, entries may be appended to the list
-                        * while the lock is dropped, but the workqueue
-                        * being active prevents entries being deleted /
-                        * freed.
-                        */
-                       mutex_unlock(&acpi_desc->init_mutex);
-                       ssleep(1);
-                       tmo--;
-                       mutex_lock(&acpi_desc->init_mutex);
-                       continue;
-               }
-
-               /* we got some results, but there are more pending... */
-               if (rc == -ENOSPC && overflow_retry--) {
-                       if (!init_ars_len) {
-                               init_ars_len = acpi_desc->ars_status->length;
-                               init_ars_start = acpi_desc->ars_status->address;
-                       }
-                       rc = ars_continue(acpi_desc);
-               }
-
-               if (rc < 0) {
-                       dev_warn(dev, "range %d ars continuation failed\n",
-                                       spa->range_index);
+                       *query_rc = rc;
                        break;
-               }
-
-               if (init_ars_len) {
-                       ars_start = init_ars_start;
-                       ars_len = init_ars_len;
+               } else if (rc == 0) {
+                       rc = acpi_nfit_query_poison(acpi_desc);
                } else {
-                       ars_start = acpi_desc->ars_status->address;
-                       ars_len = acpi_desc->ars_status->length;
+                       set_bit(ARS_FAILED, &nfit_spa->ars_state);
+                       break;
                }
-               dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
-                               spa->range_index, ars_start, ars_len);
-               /* notify the region about new poison entries */
-               nvdimm_region_notify(nfit_spa->nd_region,
-                               NVDIMM_REVALIDATE_POISON);
+               if (rc == -EAGAIN)
+                       clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+               else if (rc == 0)
+                       ars_complete(acpi_desc, nfit_spa);
                break;
-       } while (1);
+       case -EBUSY:
+       case -ENOSPC:
+               break;
+       default:
+               set_bit(ARS_FAILED, &nfit_spa->ars_state);
+               break;
+       }
+
+       if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
+               set_bit(ARS_REQ, &nfit_spa->ars_state);
+
+       return acpi_nfit_register_region(acpi_desc, nfit_spa);
 }
 
-static void acpi_nfit_scrub(struct work_struct *work)
+static void ars_complete_all(struct acpi_nfit_desc *acpi_desc)
 {
-       struct device *dev;
-       u64 init_scrub_length = 0;
        struct nfit_spa *nfit_spa;
-       u64 init_scrub_address = 0;
-       bool init_ars_done = false;
-       struct acpi_nfit_desc *acpi_desc;
-       unsigned int tmo = scrub_timeout;
-       unsigned int overflow_retry = scrub_overflow_abort;
-
-       acpi_desc = container_of(work, typeof(*acpi_desc), work);
-       dev = acpi_desc->dev;
-
-       /*
-        * We scrub in 2 phases.  The first phase waits for any platform
-        * firmware initiated scrubs to complete and then we go search for the
-        * affected spa regions to mark them scanned.  In the second phase we
-        * initiate a directed scrub for every range that was not scrubbed in
-        * phase 1. If we're called for a 'rescan', we harmlessly pass through
-        * the first phase, but really only care about running phase 2, where
-        * regions can be notified of new poison.
-        */
 
-       /* process platform firmware initiated scrubs */
- retry:
-       mutex_lock(&acpi_desc->init_mutex);
        list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               struct nd_cmd_ars_status *ars_status;
-               struct acpi_nfit_system_address *spa;
-               u64 ars_start, ars_len;
-               int rc;
-
-               if (acpi_desc->cancel)
-                       break;
-
-               if (nfit_spa->nd_region)
+               if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
                        continue;
+               ars_complete(acpi_desc, nfit_spa);
+       }
+}
 
-               if (init_ars_done) {
-                       /*
-                        * No need to re-query, we're now just
-                        * reconciling all the ranges covered by the
-                        * initial scrub
-                        */
-                       rc = 0;
-               } else
-                       rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-
-               if (rc == -ENOTTY) {
-                       /* no ars capability, just register spa and move on */
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
-                       continue;
-               }
-
-               if (rc == -EBUSY && !tmo) {
-                       /* fallthrough to directed scrub in phase 2 */
-                       dev_warn(dev, "timeout awaiting ars results, continuing...\n");
-                       break;
-               } else if (rc == -EBUSY) {
-                       mutex_unlock(&acpi_desc->init_mutex);
-                       ssleep(1);
-                       tmo--;
-                       goto retry;
-               }
-
-               /* we got some results, but there are more pending... */
-               if (rc == -ENOSPC && overflow_retry--) {
-                       ars_status = acpi_desc->ars_status;
-                       /*
-                        * Record the original scrub range, so that we
-                        * can recall all the ranges impacted by the
-                        * initial scrub.
-                        */
-                       if (!init_scrub_length) {
-                               init_scrub_length = ars_status->length;
-                               init_scrub_address = ars_status->address;
-                       }
-                       rc = ars_continue(acpi_desc);
-                       if (rc == 0) {
-                               mutex_unlock(&acpi_desc->init_mutex);
-                               goto retry;
-                       }
-               }
+static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
+               int query_rc)
+{
+       unsigned int tmo = acpi_desc->scrub_tmo;
+       struct device *dev = acpi_desc->dev;
+       struct nfit_spa *nfit_spa;
 
-               if (rc < 0) {
-                       /*
-                        * Initial scrub failed, we'll give it one more
-                        * try below...
-                        */
-                       break;
-               }
+       if (acpi_desc->cancel)
+               return 0;
 
-               /* We got some final results, record completed ranges */
-               ars_status = acpi_desc->ars_status;
-               if (init_scrub_length) {
-                       ars_start = init_scrub_address;
-                       ars_len = ars_start + init_scrub_length;
-               } else {
-                       ars_start = ars_status->address;
-                       ars_len = ars_status->length;
-               }
-               spa = nfit_spa->spa;
+       if (query_rc == -EBUSY) {
+               dev_dbg(dev, "ARS: ARS busy\n");
+               return min(30U * 60U, tmo * 2);
+       }
+       if (query_rc == -ENOSPC) {
+               dev_dbg(dev, "ARS: ARS continue\n");
+               ars_continue(acpi_desc);
+               return 1;
+       }
+       if (query_rc && query_rc != -EAGAIN) {
+               unsigned long long addr, end;
 
-               if (!init_ars_done) {
-                       init_ars_done = true;
-                       dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
-                                       ars_start, ars_len);
-               }
-               if (ars_start <= spa->address && ars_start + ars_len
-                               >= spa->address + spa->length)
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+               addr = acpi_desc->ars_status->address;
+               end = addr + acpi_desc->ars_status->length;
+               dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end,
+                               query_rc);
        }
 
-       /*
-        * For all the ranges not covered by an initial scrub we still
-        * want to see if there are errors, but it's ok to discover them
-        * asynchronously.
-        */
+       ars_complete_all(acpi_desc);
        list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               /*
-                * Flag all the ranges that still need scrubbing, but
-                * register them now to make data available.
-                */
-               if (!nfit_spa->nd_region) {
-                       nfit_spa->ars_required = 1;
-                       acpi_nfit_register_region(acpi_desc, nfit_spa);
+               if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
+                       continue;
+               if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
+                       int rc = ars_start(acpi_desc, nfit_spa);
+
+                       clear_bit(ARS_DONE, &nfit_spa->ars_state);
+                       dev = nd_region_dev(nfit_spa->nd_region);
+                       dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
+                                       nfit_spa->spa->range_index, rc);
+                       if (rc == 0 || rc == -EBUSY)
+                               return 1;
+                       dev_err(dev, "ARS: range %d ARS failed (%d)\n",
+                                       nfit_spa->spa->range_index, rc);
+                       set_bit(ARS_FAILED, &nfit_spa->ars_state);
                }
        }
-       acpi_desc->init_complete = 1;
+       return 0;
+}
 
-       list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-               acpi_nfit_async_scrub(acpi_desc, nfit_spa);
-       acpi_desc->scrub_count++;
-       acpi_desc->ars_start_flags = 0;
-       if (acpi_desc->scrub_count_state)
-               sysfs_notify_dirent(acpi_desc->scrub_count_state);
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+       struct acpi_nfit_desc *acpi_desc;
+       unsigned int tmo;
+       int query_rc;
+
+       acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
+       mutex_lock(&acpi_desc->init_mutex);
+       query_rc = acpi_nfit_query_poison(acpi_desc);
+       tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
+       if (tmo) {
+               queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
+               acpi_desc->scrub_tmo = tmo;
+       } else {
+               acpi_desc->scrub_count++;
+               if (acpi_desc->scrub_count_state)
+                       sysfs_notify_dirent(acpi_desc->scrub_count_state);
+       }
+       memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
        mutex_unlock(&acpi_desc->init_mutex);
 }
 
+static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
+               struct nfit_spa *nfit_spa)
+{
+       int type = nfit_spa_type(nfit_spa->spa);
+       struct nd_cmd_ars_cap ars_cap;
+       int rc;
+
+       memset(&ars_cap, 0, sizeof(ars_cap));
+       rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
+       if (rc < 0)
+               return;
+       /* check that the supported scrub types match the spa type */
+       if (type == NFIT_SPA_VOLATILE && ((ars_cap.status >> 16)
+                               & ND_ARS_VOLATILE) == 0)
+               return;
+       if (type == NFIT_SPA_PM && ((ars_cap.status >> 16)
+                               & ND_ARS_PERSISTENT) == 0)
+               return;
+
+       nfit_spa->max_ars = ars_cap.max_ars_out;
+       nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
+       acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
+       clear_bit(ARS_FAILED, &nfit_spa->ars_state);
+       set_bit(ARS_REQ, &nfit_spa->ars_state);
+}
+
 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 {
        struct nfit_spa *nfit_spa;
-       int rc;
+       int rc, query_rc;
+
+       list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+               set_bit(ARS_FAILED, &nfit_spa->ars_state);
+               switch (nfit_spa_type(nfit_spa->spa)) {
+               case NFIT_SPA_VOLATILE:
+               case NFIT_SPA_PM:
+                       acpi_nfit_init_ars(acpi_desc, nfit_spa);
+                       break;
+               }
+       }
+
+       /*
+        * Reap any results that might be pending before starting new
+        * short requests.
+        */
+       query_rc = acpi_nfit_query_poison(acpi_desc);
+       if (query_rc == 0)
+               ars_complete_all(acpi_desc);
 
        list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-               if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
-                       /* BLK regions don't need to wait for ars results */
+               switch (nfit_spa_type(nfit_spa->spa)) {
+               case NFIT_SPA_VOLATILE:
+               case NFIT_SPA_PM:
+                       /* register regions and kick off initial ARS run */
+                       rc = ars_register(acpi_desc, nfit_spa, &query_rc);
+                       if (rc)
+                               return rc;
+                       break;
+               case NFIT_SPA_BDW:
+                       /* nothing to register */
+                       break;
+               case NFIT_SPA_DCR:
+               case NFIT_SPA_VDISK:
+               case NFIT_SPA_VCD:
+               case NFIT_SPA_PDISK:
+               case NFIT_SPA_PCD:
+                       /* register known regions that don't support ARS */
                        rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
                        if (rc)
                                return rc;
+                       break;
+               default:
+                       /* don't register unknown regions */
+                       break;
                }
 
-       acpi_desc->ars_start_flags = 0;
-       if (!acpi_desc->cancel)
-               queue_work(nfit_wq, &acpi_desc->work);
+       queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
        return 0;
 }
 
@@ -3173,8 +3157,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
                data = add_table(acpi_desc, &prev, data, end);
 
        if (IS_ERR(data)) {
-               dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
-                               PTR_ERR(data));
+               dev_dbg(dev, "nfit table parsing error: %ld\n", PTR_ERR(data));
                rc = PTR_ERR(data);
                goto out_unlock;
        }
@@ -3199,49 +3182,20 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_init);
 
-struct acpi_nfit_flush_work {
-       struct work_struct work;
-       struct completion cmp;
-};
-
-static void flush_probe(struct work_struct *work)
-{
-       struct acpi_nfit_flush_work *flush;
-
-       flush = container_of(work, typeof(*flush), work);
-       complete(&flush->cmp);
-}
-
 static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 {
        struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
        struct device *dev = acpi_desc->dev;
-       struct acpi_nfit_flush_work flush;
-       int rc;
 
-       /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+       /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
        device_lock(dev);
        device_unlock(dev);
 
-       /* bounce the init_mutex to make init_complete valid */
+       /* Bounce the init_mutex to complete initial registration */
        mutex_lock(&acpi_desc->init_mutex);
-       if (acpi_desc->cancel || acpi_desc->init_complete) {
-               mutex_unlock(&acpi_desc->init_mutex);
-               return 0;
-       }
-
-       /*
-        * Scrub work could take 10s of seconds, userspace may give up so we
-        * need to be interruptible while waiting.
-        */
-       INIT_WORK_ONSTACK(&flush.work, flush_probe);
-       init_completion(&flush.cmp);
-       queue_work(nfit_wq, &flush.work);
        mutex_unlock(&acpi_desc->init_mutex);
 
-       rc = wait_for_completion_interruptible(&flush.cmp);
-       cancel_work_sync(&flush.work);
-       return rc;
+       return 0;
 }
 
 static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
@@ -3260,20 +3214,18 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
         * just needs guarantees that any ars it initiates are not
         * interrupted by any intervening start reqeusts from userspace.
         */
-       if (work_busy(&acpi_desc->work))
+       if (work_busy(&acpi_desc->dwork.work))
                return -EBUSY;
 
        return 0;
 }
 
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
 {
        struct device *dev = acpi_desc->dev;
+       int scheduled = 0, busy = 0;
        struct nfit_spa *nfit_spa;
 
-       if (work_busy(&acpi_desc->work))
-               return -EBUSY;
-
        mutex_lock(&acpi_desc->init_mutex);
        if (acpi_desc->cancel) {
                mutex_unlock(&acpi_desc->init_mutex);
@@ -3281,19 +3233,32 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
        }
 
        list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-               struct acpi_nfit_system_address *spa = nfit_spa->spa;
+               int type = nfit_spa_type(nfit_spa->spa);
 
-               if (nfit_spa_type(spa) != NFIT_SPA_PM)
+               if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE)
+                       continue;
+               if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
                        continue;
 
-               nfit_spa->ars_required = 1;
+               if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state))
+                       busy++;
+               else {
+                       if (test_bit(ARS_SHORT, &flags))
+                               set_bit(ARS_SHORT, &nfit_spa->ars_state);
+                       scheduled++;
+               }
+       }
+       if (scheduled) {
+               queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
+               dev_dbg(dev, "ars_scan triggered\n");
        }
-       acpi_desc->ars_start_flags = flags;
-       queue_work(nfit_wq, &acpi_desc->work);
-       dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
        mutex_unlock(&acpi_desc->init_mutex);
 
-       return 0;
+       if (scheduled)
+               return 0;
+       if (busy)
+               return -EBUSY;
+       return -ENOTTY;
 }
 
 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
@@ -3320,7 +3285,8 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
        INIT_LIST_HEAD(&acpi_desc->dimms);
        INIT_LIST_HEAD(&acpi_desc->list);
        mutex_init(&acpi_desc->init_mutex);
-       INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+       acpi_desc->scrub_tmo = 1;
+       INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
 
@@ -3344,6 +3310,7 @@ void acpi_nfit_shutdown(void *data)
 
        mutex_lock(&acpi_desc->init_mutex);
        acpi_desc->cancel = 1;
+       cancel_delayed_work_sync(&acpi_desc->dwork);
        mutex_unlock(&acpi_desc->init_mutex);
 
        /*
@@ -3397,8 +3364,8 @@ static int acpi_nfit_add(struct acpi_device *adev)
                        rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
                                        obj->buffer.length);
                else
-                       dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
-                                __func__, (int) obj->type);
+                       dev_dbg(dev, "invalid type %d, ignoring _FIT\n",
+                               (int) obj->type);
                kfree(buf.pointer);
        } else
                /* skip over the lead-in header table */
@@ -3427,7 +3394,7 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 
        if (!dev->driver) {
                /* dev->driver may be null if we're being removed */
-               dev_dbg(dev, "%s: no driver found for dev\n", __func__);
+               dev_dbg(dev, "no driver found for dev\n");
                return;
        }
 
@@ -3465,15 +3432,15 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
 {
        struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
-       u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
-                       0 : ND_ARS_RETURN_PREV_DATA;
+       unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
+                       0 : 1 << ARS_SHORT;
 
        acpi_nfit_ars_rescan(acpi_desc, flags);
 }
 
 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
 {
-       dev_dbg(dev, "%s: event: 0x%x\n", __func__, event);
+       dev_dbg(dev, "event: 0x%x\n", event);
 
        switch (event) {
        case NFIT_NOTIFY_UPDATE:
index b92921439657a17d0960e3a827eb898e8bfe0061..e9626bf6ca2960a2398aeeefc0f4c9e814e60c1b 100644 (file)
@@ -51,9 +51,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
                        if ((spa->address + spa->length - 1) < mce->addr)
                                continue;
                        found_match = 1;
-                       dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
-                               __func__, spa->range_index, spa->address,
-                               spa->length);
+                       dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
+                               spa->range_index, spa->address, spa->length);
                        /*
                         * We can break at the first match because we're going
                         * to rescan all the SPA ranges. There shouldn't be any
index 50d36e166d7011dbc0bcb29ccbd9166c128fc995..7d15856a739f9dc70cbb4e325d95395829cb6b63 100644 (file)
@@ -117,10 +117,17 @@ enum nfit_dimm_notifiers {
        NFIT_NOTIFY_DIMM_HEALTH = 0x81,
 };
 
+enum nfit_ars_state {
+       ARS_REQ,
+       ARS_DONE,
+       ARS_SHORT,
+       ARS_FAILED,
+};
+
 struct nfit_spa {
        struct list_head list;
        struct nd_region *nd_region;
-       unsigned int ars_required:1;
+       unsigned long ars_state;
        u32 clear_err_unit;
        u32 max_ars;
        struct acpi_nfit_system_address spa[0];
@@ -171,9 +178,8 @@ struct nfit_mem {
        struct resource *flush_wpq;
        unsigned long dsm_mask;
        int family;
-       u32 has_lsi:1;
-       u32 has_lsr:1;
-       u32 has_lsw:1;
+       bool has_lsr;
+       bool has_lsw;
 };
 
 struct acpi_nfit_desc {
@@ -191,18 +197,18 @@ struct acpi_nfit_desc {
        struct device *dev;
        u8 ars_start_flags;
        struct nd_cmd_ars_status *ars_status;
-       size_t ars_status_size;
-       struct work_struct work;
+       struct delayed_work dwork;
        struct list_head list;
        struct kernfs_node *scrub_count_state;
+       unsigned int max_ars;
        unsigned int scrub_count;
        unsigned int scrub_mode;
        unsigned int cancel:1;
-       unsigned int init_complete:1;
        unsigned long dimm_cmd_force_en;
        unsigned long bus_cmd_force_en;
        unsigned long bus_nfit_cmd_force_en;
        unsigned int platform_cap;
+       unsigned int scrub_tmo;
        int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
                        void *iobuf, u64 len, int rw);
 };
@@ -244,7 +250,7 @@ struct nfit_blk {
 
 extern struct list_head acpi_descs;
 extern struct mutex acpi_desc_lock;
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags);
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags);
 
 #ifdef CONFIG_X86_MCE
 void nfit_mce_register(void);
index c7cf48ad5cb9dafc42826a26fedd229733cb87ef..a651ab3490d8bbbe843204c6065bb30c64f7dfb8 100644 (file)
@@ -533,7 +533,7 @@ int acpi_processor_notify_smm(struct module *calling_module)
 
 EXPORT_SYMBOL(acpi_processor_notify_smm);
 
-static int acpi_processor_get_psd(struct acpi_processor        *pr)
+int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain)
 {
        int result = 0;
        acpi_status status = AE_OK;
@@ -541,9 +541,8 @@ static int acpi_processor_get_psd(struct acpi_processor     *pr)
        struct acpi_buffer format = {sizeof("NNNNN"), "NNNNN"};
        struct acpi_buffer state = {0, NULL};
        union acpi_object  *psd = NULL;
-       struct acpi_psd_package *pdomain;
 
-       status = acpi_evaluate_object(pr->handle, "_PSD", NULL, &buffer);
+       status = acpi_evaluate_object(handle, "_PSD", NULL, &buffer);
        if (ACPI_FAILURE(status)) {
                return -ENODEV;
        }
@@ -561,8 +560,6 @@ static int acpi_processor_get_psd(struct acpi_processor     *pr)
                goto end;
        }
 
-       pdomain = &(pr->performance->domain_info);
-
        state.length = sizeof(struct acpi_psd_package);
        state.pointer = pdomain;
 
@@ -597,6 +594,7 @@ end:
        kfree(buffer.pointer);
        return result;
 }
+EXPORT_SYMBOL(acpi_processor_get_psd);
 
 int acpi_processor_preregister_performance(
                struct acpi_processor_performance __percpu *performance)
@@ -645,7 +643,8 @@ int acpi_processor_preregister_performance(
 
                pr->performance = per_cpu_ptr(performance, i);
                cpumask_set_cpu(i, pr->performance->shared_cpu_map);
-               if (acpi_processor_get_psd(pr)) {
+               pdomain = &(pr->performance->domain_info);
+               if (acpi_processor_get_psd(pr->handle, pdomain)) {
                        retval = -EINVAL;
                        continue;
                }
index 79fcd2bae96b5246ccfa2fbff0dd4ebf3f277ad9..bffe8616bd55437e728404a90305b95f1a503dfc 100644 (file)
@@ -837,11 +837,8 @@ int __init memory_dev_init(void)
         * during boot and have been initialized
         */
        mutex_lock(&mem_sysfs_mutex);
-       for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) {
-               /* Don't iterate over sections we know are !present: */
-               if (i > __highest_present_section_nr)
-                       break;
-
+       for (i = 0; i <= __highest_present_section_nr;
+               i += sections_per_block) {
                err = add_memory_block(i);
                if (!ret)
                        ret = err;
index 1e03b04819c8652afba4332adaf7d7120f80755d..07dc5419bd63bec43af2de5da0a485ec99bab7cd 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/cls_lock_client.h>
+#include <linux/ceph/striper.h>
 #include <linux/ceph/decode.h>
 #include <linux/parser.h>
 #include <linux/bsearch.h>
@@ -200,95 +201,81 @@ struct rbd_client {
 };
 
 struct rbd_img_request;
-typedef void (*rbd_img_callback_t)(struct rbd_img_request *);
-
-#define        BAD_WHICH       U32_MAX         /* Good which or bad which, which? */
-
-struct rbd_obj_request;
-typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
 
 enum obj_request_type {
-       OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
+       OBJ_REQUEST_NODATA = 1,
+       OBJ_REQUEST_BIO,        /* pointer into provided bio (list) */
+       OBJ_REQUEST_BVECS,      /* pointer into provided bio_vec array */
+       OBJ_REQUEST_OWN_BVECS,  /* private bio_vec array, doesn't own pages */
 };
 
 enum obj_operation_type {
+       OBJ_OP_READ = 1,
        OBJ_OP_WRITE,
-       OBJ_OP_READ,
        OBJ_OP_DISCARD,
 };
 
-enum obj_req_flags {
-       OBJ_REQ_DONE,           /* completion flag: not done = 0, done = 1 */
-       OBJ_REQ_IMG_DATA,       /* object usage: standalone = 0, image = 1 */
-       OBJ_REQ_KNOWN,          /* EXISTS flag valid: no = 0, yes = 1 */
-       OBJ_REQ_EXISTS,         /* target exists: no = 0, yes = 1 */
+/*
+ * Writes go through the following state machine to deal with
+ * layering:
+ *
+ *                       need copyup
+ * RBD_OBJ_WRITE_GUARD ---------------> RBD_OBJ_WRITE_COPYUP
+ *        |     ^                              |
+ *        v     \------------------------------/
+ *      done
+ *        ^
+ *        |
+ * RBD_OBJ_WRITE_FLAT
+ *
+ * Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether
+ * there is a parent or not.
+ */
+enum rbd_obj_write_state {
+       RBD_OBJ_WRITE_FLAT = 1,
+       RBD_OBJ_WRITE_GUARD,
+       RBD_OBJ_WRITE_COPYUP,
 };
 
 struct rbd_obj_request {
-       u64                     object_no;
-       u64                     offset;         /* object start byte */
-       u64                     length;         /* bytes from offset */
-       unsigned long           flags;
-
-       /*
-        * An object request associated with an image will have its
-        * img_data flag set; a standalone object request will not.
-        *
-        * A standalone object request will have which == BAD_WHICH
-        * and a null obj_request pointer.
-        *
-        * An object request initiated in support of a layered image
-        * object (to check for its existence before a write) will
-        * have which == BAD_WHICH and a non-null obj_request pointer.
-        *
-        * Finally, an object request for rbd image data will have
-        * which != BAD_WHICH, and will have a non-null img_request
-        * pointer.  The value of which will be in the range
-        * 0..(img_request->obj_request_count-1).
-        */
+       struct ceph_object_extent ex;
        union {
-               struct rbd_obj_request  *obj_request;   /* STAT op */
-               struct {
-                       struct rbd_img_request  *img_request;
-                       u64                     img_offset;
-                       /* links for img_request->obj_requests list */
-                       struct list_head        links;
-               };
+               bool                    tried_parent;   /* for reads */
+               enum rbd_obj_write_state write_state;   /* for writes */
        };
-       u32                     which;          /* posn image request list */
 
-       enum obj_request_type   type;
+       struct rbd_img_request  *img_request;
+       struct ceph_file_extent *img_extents;
+       u32                     num_img_extents;
+
        union {
-               struct bio      *bio_list;
+               struct ceph_bio_iter    bio_pos;
                struct {
-                       struct page     **pages;
-                       u32             page_count;
+                       struct ceph_bvec_iter   bvec_pos;
+                       u32                     bvec_count;
+                       u32                     bvec_idx;
                };
        };
-       struct page             **copyup_pages;
-       u32                     copyup_page_count;
+       struct bio_vec          *copyup_bvecs;
+       u32                     copyup_bvec_count;
 
        struct ceph_osd_request *osd_req;
 
        u64                     xferred;        /* bytes transferred */
        int                     result;
 
-       rbd_obj_callback_t      callback;
-
        struct kref             kref;
 };
 
 enum img_req_flags {
-       IMG_REQ_WRITE,          /* I/O direction: read = 0, write = 1 */
        IMG_REQ_CHILD,          /* initiator: block = 0, child image = 1 */
        IMG_REQ_LAYERED,        /* ENOENT handling: normal = 0, layered = 1 */
-       IMG_REQ_DISCARD,        /* discard: normal = 0, discard request = 1 */
 };
 
 struct rbd_img_request {
        struct rbd_device       *rbd_dev;
-       u64                     offset; /* starting image byte offset */
-       u64                     length; /* byte count from offset */
+       enum obj_operation_type op_type;
+       enum obj_request_type   data_type;
        unsigned long           flags;
        union {
                u64                     snap_id;        /* for reads */
@@ -298,26 +285,21 @@ struct rbd_img_request {
                struct request          *rq;            /* block request */
                struct rbd_obj_request  *obj_request;   /* obj req initiator */
        };
-       struct page             **copyup_pages;
-       u32                     copyup_page_count;
-       spinlock_t              completion_lock;/* protects next_completion */
-       u32                     next_completion;
-       rbd_img_callback_t      callback;
+       spinlock_t              completion_lock;
        u64                     xferred;/* aggregate bytes transferred */
        int                     result; /* first nonzero obj_request result */
 
+       struct list_head        object_extents; /* obj_req.ex structs */
        u32                     obj_request_count;
-       struct list_head        obj_requests;   /* rbd_obj_request structs */
+       u32                     pending_count;
 
        struct kref             kref;
 };
 
 #define for_each_obj_request(ireq, oreq) \
-       list_for_each_entry(oreq, &(ireq)->obj_requests, links)
-#define for_each_obj_request_from(ireq, oreq) \
-       list_for_each_entry_from(oreq, &(ireq)->obj_requests, links)
+       list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item)
 #define for_each_obj_request_safe(ireq, oreq, n) \
-       list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
+       list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item)
 
 enum rbd_watch_state {
        RBD_WATCH_STATE_UNREGISTERED,
@@ -433,8 +415,6 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
 static struct kmem_cache       *rbd_img_request_cache;
 static struct kmem_cache       *rbd_obj_request_cache;
 
-static struct bio_set          *rbd_bio_clone;
-
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
 
@@ -447,8 +427,6 @@ static bool single_major = true;
 module_param(single_major, bool, S_IRUGO);
 MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
 
-static int rbd_img_request_submit(struct rbd_img_request *img_request);
-
 static ssize_t rbd_add(struct bus_type *bus, const char *buf,
                       size_t count);
 static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
@@ -458,7 +436,6 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
 static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
                                       size_t count);
 static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
-static void rbd_spec_put(struct rbd_spec *spec);
 
 static int rbd_dev_id_to_minor(int dev_id)
 {
@@ -577,9 +554,6 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
 #  define rbd_assert(expr)     ((void) 0)
 #endif /* !RBD_DEBUG */
 
-static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request);
-static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request);
-static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
 static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
 
 static int rbd_dev_refresh(struct rbd_device *rbd_dev);
@@ -856,26 +830,6 @@ static char* obj_op_name(enum obj_operation_type op_type)
        }
 }
 
-/*
- * Get a ceph client with specific addr and configuration, if one does
- * not exist create it.  Either way, ceph_opts is consumed by this
- * function.
- */
-static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
-{
-       struct rbd_client *rbdc;
-
-       mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
-       rbdc = rbd_client_find(ceph_opts);
-       if (rbdc)       /* using an existing client */
-               ceph_destroy_options(ceph_opts);
-       else
-               rbdc = rbd_client_create(ceph_opts);
-       mutex_unlock(&client_mutex);
-
-       return rbdc;
-}
-
 /*
  * Destroy ceph client
  *
@@ -904,6 +858,56 @@ static void rbd_put_client(struct rbd_client *rbdc)
                kref_put(&rbdc->kref, rbd_client_release);
 }
 
+static int wait_for_latest_osdmap(struct ceph_client *client)
+{
+       u64 newest_epoch;
+       int ret;
+
+       ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch);
+       if (ret)
+               return ret;
+
+       if (client->osdc.osdmap->epoch >= newest_epoch)
+               return 0;
+
+       ceph_osdc_maybe_request_map(&client->osdc);
+       return ceph_monc_wait_osdmap(&client->monc, newest_epoch,
+                                    client->options->mount_timeout);
+}
+
+/*
+ * Get a ceph client with specific addr and configuration, if one does
+ * not exist create it.  Either way, ceph_opts is consumed by this
+ * function.
+ */
+static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
+{
+       struct rbd_client *rbdc;
+       int ret;
+
+       mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
+       rbdc = rbd_client_find(ceph_opts);
+       if (rbdc) {
+               ceph_destroy_options(ceph_opts);
+
+               /*
+                * Using an existing client.  Make sure ->pg_pools is up to
+                * date before we look up the pool id in do_rbd_add().
+                */
+               ret = wait_for_latest_osdmap(rbdc->client);
+               if (ret) {
+                       rbd_warn(NULL, "failed to get latest osdmap: %d", ret);
+                       rbd_put_client(rbdc);
+                       rbdc = ERR_PTR(ret);
+               }
+       } else {
+               rbdc = rbd_client_create(ceph_opts);
+       }
+       mutex_unlock(&client_mutex);
+
+       return rbdc;
+}
+
 static bool rbd_image_format_valid(u32 image_format)
 {
        return image_format == 1 || image_format == 2;
@@ -1223,272 +1227,59 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
        rbd_dev->mapping.features = 0;
 }
 
-static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
-{
-       u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
-
-       return offset & (segment_size - 1);
-}
-
-static u64 rbd_segment_length(struct rbd_device *rbd_dev,
-                               u64 offset, u64 length)
-{
-       u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
-
-       offset &= segment_size - 1;
-
-       rbd_assert(length <= U64_MAX - offset);
-       if (offset + length > segment_size)
-               length = segment_size - offset;
-
-       return length;
-}
-
-/*
- * bio helpers
- */
-
-static void bio_chain_put(struct bio *chain)
-{
-       struct bio *tmp;
-
-       while (chain) {
-               tmp = chain;
-               chain = chain->bi_next;
-               bio_put(tmp);
-       }
-}
-
-/*
- * zeros a bio chain, starting at specific offset
- */
-static void zero_bio_chain(struct bio *chain, int start_ofs)
+static void zero_bvec(struct bio_vec *bv)
 {
-       struct bio_vec bv;
-       struct bvec_iter iter;
-       unsigned long flags;
        void *buf;
-       int pos = 0;
-
-       while (chain) {
-               bio_for_each_segment(bv, chain, iter) {
-                       if (pos + bv.bv_len > start_ofs) {
-                               int remainder = max(start_ofs - pos, 0);
-                               buf = bvec_kmap_irq(&bv, &flags);
-                               memset(buf + remainder, 0,
-                                      bv.bv_len - remainder);
-                               flush_dcache_page(bv.bv_page);
-                               bvec_kunmap_irq(buf, &flags);
-                       }
-                       pos += bv.bv_len;
-               }
+       unsigned long flags;
 
-               chain = chain->bi_next;
-       }
+       buf = bvec_kmap_irq(bv, &flags);
+       memset(buf, 0, bv->bv_len);
+       flush_dcache_page(bv->bv_page);
+       bvec_kunmap_irq(buf, &flags);
 }
 
-/*
- * similar to zero_bio_chain(), zeros data defined by a page array,
- * starting at the given byte offset from the start of the array and
- * continuing up to the given end offset.  The pages array is
- * assumed to be big enough to hold all bytes up to the end.
- */
-static void zero_pages(struct page **pages, u64 offset, u64 end)
+static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes)
 {
-       struct page **page = &pages[offset >> PAGE_SHIFT];
-
-       rbd_assert(end > offset);
-       rbd_assert(end - offset <= (u64)SIZE_MAX);
-       while (offset < end) {
-               size_t page_offset;
-               size_t length;
-               unsigned long flags;
-               void *kaddr;
-
-               page_offset = offset & ~PAGE_MASK;
-               length = min_t(size_t, PAGE_SIZE - page_offset, end - offset);
-               local_irq_save(flags);
-               kaddr = kmap_atomic(*page);
-               memset(kaddr + page_offset, 0, length);
-               flush_dcache_page(*page);
-               kunmap_atomic(kaddr);
-               local_irq_restore(flags);
+       struct ceph_bio_iter it = *bio_pos;
 
-               offset += length;
-               page++;
-       }
+       ceph_bio_iter_advance(&it, off);
+       ceph_bio_iter_advance_step(&it, bytes, ({
+               zero_bvec(&bv);
+       }));
 }
 
-/*
- * Clone a portion of a bio, starting at the given byte offset
- * and continuing for the number of bytes indicated.
- */
-static struct bio *bio_clone_range(struct bio *bio_src,
-                                       unsigned int offset,
-                                       unsigned int len,
-                                       gfp_t gfpmask)
+static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes)
 {
-       struct bio *bio;
-
-       bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
-       if (!bio)
-               return NULL;    /* ENOMEM */
+       struct ceph_bvec_iter it = *bvec_pos;
 
-       bio_advance(bio, offset);
-       bio->bi_iter.bi_size = len;
-
-       return bio;
+       ceph_bvec_iter_advance(&it, off);
+       ceph_bvec_iter_advance_step(&it, bytes, ({
+               zero_bvec(&bv);
+       }));
 }
 
 /*
- * Clone a portion of a bio chain, starting at the given byte offset
- * into the first bio in the source chain and continuing for the
- * number of bytes indicated.  The result is another bio chain of
- * exactly the given length, or a null pointer on error.
- *
- * The bio_src and offset parameters are both in-out.  On entry they
- * refer to the first source bio and the offset into that bio where
- * the start of data to be cloned is located.
+ * Zero a range in @obj_req data buffer defined by a bio (list) or
+ * (private) bio_vec array.
  *
- * On return, bio_src is updated to refer to the bio in the source
- * chain that contains first un-cloned byte, and *offset will
- * contain the offset of that byte within that bio.
- */
-static struct bio *bio_chain_clone_range(struct bio **bio_src,
-                                       unsigned int *offset,
-                                       unsigned int len,
-                                       gfp_t gfpmask)
-{
-       struct bio *bi = *bio_src;
-       unsigned int off = *offset;
-       struct bio *chain = NULL;
-       struct bio **end;
-
-       /* Build up a chain of clone bios up to the limit */
-
-       if (!bi || off >= bi->bi_iter.bi_size || !len)
-               return NULL;            /* Nothing to clone */
-
-       end = &chain;
-       while (len) {
-               unsigned int bi_size;
-               struct bio *bio;
-
-               if (!bi) {
-                       rbd_warn(NULL, "bio_chain exhausted with %u left", len);
-                       goto out_err;   /* EINVAL; ran out of bio's */
-               }
-               bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len);
-               bio = bio_clone_range(bi, off, bi_size, gfpmask);
-               if (!bio)
-                       goto out_err;   /* ENOMEM */
-
-               *end = bio;
-               end = &bio->bi_next;
-
-               off += bi_size;
-               if (off == bi->bi_iter.bi_size) {
-                       bi = bi->bi_next;
-                       off = 0;
-               }
-               len -= bi_size;
-       }
-       *bio_src = bi;
-       *offset = off;
-
-       return chain;
-out_err:
-       bio_chain_put(chain);
-
-       return NULL;
-}
-
-/*
- * The default/initial value for all object request flags is 0.  For
- * each flag, once its value is set to 1 it is never reset to 0
- * again.
+ * @off is relative to the start of the data buffer.
  */
-static void obj_request_img_data_set(struct rbd_obj_request *obj_request)
-{
-       if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) {
-               struct rbd_device *rbd_dev;
-
-               rbd_dev = obj_request->img_request->rbd_dev;
-               rbd_warn(rbd_dev, "obj_request %p already marked img_data",
-                       obj_request);
-       }
-}
-
-static bool obj_request_img_data_test(struct rbd_obj_request *obj_request)
+static void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off,
+                              u32 bytes)
 {
-       smp_mb();
-       return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0;
-}
-
-static void obj_request_done_set(struct rbd_obj_request *obj_request)
-{
-       if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) {
-               struct rbd_device *rbd_dev = NULL;
-
-               if (obj_request_img_data_test(obj_request))
-                       rbd_dev = obj_request->img_request->rbd_dev;
-               rbd_warn(rbd_dev, "obj_request %p already marked done",
-                       obj_request);
+       switch (obj_req->img_request->data_type) {
+       case OBJ_REQUEST_BIO:
+               zero_bios(&obj_req->bio_pos, off, bytes);
+               break;
+       case OBJ_REQUEST_BVECS:
+       case OBJ_REQUEST_OWN_BVECS:
+               zero_bvecs(&obj_req->bvec_pos, off, bytes);
+               break;
+       default:
+               rbd_assert(0);
        }
 }
 
-static bool obj_request_done_test(struct rbd_obj_request *obj_request)
-{
-       smp_mb();
-       return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0;
-}
-
-/*
- * This sets the KNOWN flag after (possibly) setting the EXISTS
- * flag.  The latter is set based on the "exists" value provided.
- *
- * Note that for our purposes once an object exists it never goes
- * away again.  It's possible that the response from two existence
- * checks are separated by the creation of the target object, and
- * the first ("doesn't exist") response arrives *after* the second
- * ("does exist").  In that case we ignore the second one.
- */
-static void obj_request_existence_set(struct rbd_obj_request *obj_request,
-                               bool exists)
-{
-       if (exists)
-               set_bit(OBJ_REQ_EXISTS, &obj_request->flags);
-       set_bit(OBJ_REQ_KNOWN, &obj_request->flags);
-       smp_mb();
-}
-
-static bool obj_request_known_test(struct rbd_obj_request *obj_request)
-{
-       smp_mb();
-       return test_bit(OBJ_REQ_KNOWN, &obj_request->flags) != 0;
-}
-
-static bool obj_request_exists_test(struct rbd_obj_request *obj_request)
-{
-       smp_mb();
-       return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0;
-}
-
-static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request)
-{
-       struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
-
-       return obj_request->img_offset <
-           round_up(rbd_dev->parent_overlap, rbd_obj_bytes(&rbd_dev->header));
-}
-
-static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
-{
-       dout("%s: obj %p (was %d)\n", __func__, obj_request,
-               kref_read(&obj_request->kref));
-       kref_get(&obj_request->kref);
-}
-
 static void rbd_obj_request_destroy(struct kref *kref);
 static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
 {
@@ -1505,18 +1296,13 @@ static void rbd_img_request_get(struct rbd_img_request *img_request)
        kref_get(&img_request->kref);
 }
 
-static bool img_request_child_test(struct rbd_img_request *img_request);
-static void rbd_parent_request_destroy(struct kref *kref);
 static void rbd_img_request_destroy(struct kref *kref);
 static void rbd_img_request_put(struct rbd_img_request *img_request)
 {
        rbd_assert(img_request != NULL);
        dout("%s: img %p (was %d)\n", __func__, img_request,
                kref_read(&img_request->kref));
-       if (img_request_child_test(img_request))
-               kref_put(&img_request->kref, rbd_parent_request_destroy);
-       else
-               kref_put(&img_request->kref, rbd_img_request_destroy);
+       kref_put(&img_request->kref, rbd_img_request_destroy);
 }
 
 static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
@@ -1526,139 +1312,37 @@ static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
 
        /* Image request now owns object's original reference */
        obj_request->img_request = img_request;
-       obj_request->which = img_request->obj_request_count;
-       rbd_assert(!obj_request_img_data_test(obj_request));
-       obj_request_img_data_set(obj_request);
-       rbd_assert(obj_request->which != BAD_WHICH);
        img_request->obj_request_count++;
-       list_add_tail(&obj_request->links, &img_request->obj_requests);
-       dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
-               obj_request->which);
+       img_request->pending_count++;
+       dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
 }
 
 static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
                                        struct rbd_obj_request *obj_request)
 {
-       rbd_assert(obj_request->which != BAD_WHICH);
-
-       dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
-               obj_request->which);
-       list_del(&obj_request->links);
+       dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
+       list_del(&obj_request->ex.oe_item);
        rbd_assert(img_request->obj_request_count > 0);
        img_request->obj_request_count--;
-       rbd_assert(obj_request->which == img_request->obj_request_count);
-       obj_request->which = BAD_WHICH;
-       rbd_assert(obj_request_img_data_test(obj_request));
        rbd_assert(obj_request->img_request == img_request);
-       obj_request->img_request = NULL;
-       obj_request->callback = NULL;
        rbd_obj_request_put(obj_request);
 }
 
-static bool obj_request_type_valid(enum obj_request_type type)
-{
-       switch (type) {
-       case OBJ_REQUEST_NODATA:
-       case OBJ_REQUEST_BIO:
-       case OBJ_REQUEST_PAGES:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static void rbd_img_obj_callback(struct rbd_obj_request *obj_request);
-
 static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
 {
        struct ceph_osd_request *osd_req = obj_request->osd_req;
 
        dout("%s %p object_no %016llx %llu~%llu osd_req %p\n", __func__,
-            obj_request, obj_request->object_no, obj_request->offset,
-            obj_request->length, osd_req);
-       if (obj_request_img_data_test(obj_request)) {
-               WARN_ON(obj_request->callback != rbd_img_obj_callback);
-               rbd_img_request_get(obj_request->img_request);
-       }
+            obj_request, obj_request->ex.oe_objno, obj_request->ex.oe_off,
+            obj_request->ex.oe_len, osd_req);
        ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
 }
 
-static void rbd_img_request_complete(struct rbd_img_request *img_request)
-{
-
-       dout("%s: img %p\n", __func__, img_request);
-
-       /*
-        * If no error occurred, compute the aggregate transfer
-        * count for the image request.  We could instead use
-        * atomic64_cmpxchg() to update it as each object request
-        * completes; not clear which way is better off hand.
-        */
-       if (!img_request->result) {
-               struct rbd_obj_request *obj_request;
-               u64 xferred = 0;
-
-               for_each_obj_request(img_request, obj_request)
-                       xferred += obj_request->xferred;
-               img_request->xferred = xferred;
-       }
-
-       if (img_request->callback)
-               img_request->callback(img_request);
-       else
-               rbd_img_request_put(img_request);
-}
-
 /*
  * The default/initial value for all image request flags is 0.  Each
  * is conditionally set to 1 at image request initialization time
  * and currently never change thereafter.
  */
-static void img_request_write_set(struct rbd_img_request *img_request)
-{
-       set_bit(IMG_REQ_WRITE, &img_request->flags);
-       smp_mb();
-}
-
-static bool img_request_write_test(struct rbd_img_request *img_request)
-{
-       smp_mb();
-       return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0;
-}
-
-/*
- * Set the discard flag when the img_request is an discard request
- */
-static void img_request_discard_set(struct rbd_img_request *img_request)
-{
-       set_bit(IMG_REQ_DISCARD, &img_request->flags);
-       smp_mb();
-}
-
-static bool img_request_discard_test(struct rbd_img_request *img_request)
-{
-       smp_mb();
-       return test_bit(IMG_REQ_DISCARD, &img_request->flags) != 0;
-}
-
-static void img_request_child_set(struct rbd_img_request *img_request)
-{
-       set_bit(IMG_REQ_CHILD, &img_request->flags);
-       smp_mb();
-}
-
-static void img_request_child_clear(struct rbd_img_request *img_request)
-{
-       clear_bit(IMG_REQ_CHILD, &img_request->flags);
-       smp_mb();
-}
-
-static bool img_request_child_test(struct rbd_img_request *img_request)
-{
-       smp_mb();
-       return test_bit(IMG_REQ_CHILD, &img_request->flags) != 0;
-}
-
 static void img_request_layered_set(struct rbd_img_request *img_request)
 {
        set_bit(IMG_REQ_LAYERED, &img_request->flags);
@@ -1677,209 +1361,70 @@ static bool img_request_layered_test(struct rbd_img_request *img_request)
        return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0;
 }
 
-static enum obj_operation_type
-rbd_img_request_op_type(struct rbd_img_request *img_request)
-{
-       if (img_request_write_test(img_request))
-               return OBJ_OP_WRITE;
-       else if (img_request_discard_test(img_request))
-               return OBJ_OP_DISCARD;
-       else
-               return OBJ_OP_READ;
-}
-
-static void
-rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
+static bool rbd_obj_is_entire(struct rbd_obj_request *obj_req)
 {
-       u64 xferred = obj_request->xferred;
-       u64 length = obj_request->length;
+       struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
 
-       dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
-               obj_request, obj_request->img_request, obj_request->result,
-               xferred, length);
-       /*
-        * ENOENT means a hole in the image.  We zero-fill the entire
-        * length of the request.  A short read also implies zero-fill
-        * to the end of the request.  An error requires the whole
-        * length of the request to be reported finished with an error
-        * to the block layer.  In each case we update the xferred
-        * count to indicate the whole request was satisfied.
-        */
-       rbd_assert(obj_request->type != OBJ_REQUEST_NODATA);
-       if (obj_request->result == -ENOENT) {
-               if (obj_request->type == OBJ_REQUEST_BIO)
-                       zero_bio_chain(obj_request->bio_list, 0);
-               else
-                       zero_pages(obj_request->pages, 0, length);
-               obj_request->result = 0;
-       } else if (xferred < length && !obj_request->result) {
-               if (obj_request->type == OBJ_REQUEST_BIO)
-                       zero_bio_chain(obj_request->bio_list, xferred);
-               else
-                       zero_pages(obj_request->pages, xferred, length);
-       }
-       obj_request->xferred = length;
-       obj_request_done_set(obj_request);
+       return !obj_req->ex.oe_off &&
+              obj_req->ex.oe_len == rbd_dev->layout.object_size;
 }
 
-static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
+static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
 {
-       dout("%s: obj %p cb %p\n", __func__, obj_request,
-               obj_request->callback);
-       obj_request->callback(obj_request);
-}
+       struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
 
-static void rbd_obj_request_error(struct rbd_obj_request *obj_request, int err)
-{
-       obj_request->result = err;
-       obj_request->xferred = 0;
-       /*
-        * kludge - mirror rbd_obj_request_submit() to match a put in
-        * rbd_img_obj_callback()
-        */
-       if (obj_request_img_data_test(obj_request)) {
-               WARN_ON(obj_request->callback != rbd_img_obj_callback);
-               rbd_img_request_get(obj_request->img_request);
-       }
-       obj_request_done_set(obj_request);
-       rbd_obj_request_complete(obj_request);
+       return obj_req->ex.oe_off + obj_req->ex.oe_len ==
+                                       rbd_dev->layout.object_size;
 }
 
-static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
+static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
 {
-       struct rbd_img_request *img_request = NULL;
-       struct rbd_device *rbd_dev = NULL;
-       bool layered = false;
-
-       if (obj_request_img_data_test(obj_request)) {
-               img_request = obj_request->img_request;
-               layered = img_request && img_request_layered_test(img_request);
-               rbd_dev = img_request->rbd_dev;
-       }
-
-       dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
-               obj_request, img_request, obj_request->result,
-               obj_request->xferred, obj_request->length);
-       if (layered && obj_request->result == -ENOENT &&
-                       obj_request->img_offset < rbd_dev->parent_overlap)
-               rbd_img_parent_read(obj_request);
-       else if (img_request)
-               rbd_img_obj_request_read_callback(obj_request);
-       else
-               obj_request_done_set(obj_request);
+       return ceph_file_extents_bytes(obj_req->img_extents,
+                                      obj_req->num_img_extents);
 }
 
-static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
+static bool rbd_img_is_write(struct rbd_img_request *img_req)
 {
-       dout("%s: obj %p result %d %llu\n", __func__, obj_request,
-               obj_request->result, obj_request->length);
-       /*
-        * There is no such thing as a successful short write.  Set
-        * it to our originally-requested length.
-        */
-       obj_request->xferred = obj_request->length;
-       obj_request_done_set(obj_request);
-}
-
-static void rbd_osd_discard_callback(struct rbd_obj_request *obj_request)
-{
-       dout("%s: obj %p result %d %llu\n", __func__, obj_request,
-               obj_request->result, obj_request->length);
-       /*
-        * There is no such thing as a successful short discard.  Set
-        * it to our originally-requested length.
-        */
-       obj_request->xferred = obj_request->length;
-       /* discarding a non-existent object is not a problem */
-       if (obj_request->result == -ENOENT)
-               obj_request->result = 0;
-       obj_request_done_set(obj_request);
+       switch (img_req->op_type) {
+       case OBJ_OP_READ:
+               return false;
+       case OBJ_OP_WRITE:
+       case OBJ_OP_DISCARD:
+               return true;
+       default:
+               rbd_assert(0);
+       }
 }
 
-/*
- * For a simple stat call there's nothing to do.  We'll do more if
- * this is part of a write sequence for a layered image.
- */
-static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request)
-{
-       dout("%s: obj %p\n", __func__, obj_request);
-       obj_request_done_set(obj_request);
-}
-
-static void rbd_osd_call_callback(struct rbd_obj_request *obj_request)
-{
-       dout("%s: obj %p\n", __func__, obj_request);
-
-       if (obj_request_img_data_test(obj_request))
-               rbd_osd_copyup_callback(obj_request);
-       else
-               obj_request_done_set(obj_request);
-}
+static void rbd_obj_handle_request(struct rbd_obj_request *obj_req);
 
 static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
 {
-       struct rbd_obj_request *obj_request = osd_req->r_priv;
-       u16 opcode;
+       struct rbd_obj_request *obj_req = osd_req->r_priv;
 
-       dout("%s: osd_req %p\n", __func__, osd_req);
-       rbd_assert(osd_req == obj_request->osd_req);
-       if (obj_request_img_data_test(obj_request)) {
-               rbd_assert(obj_request->img_request);
-               rbd_assert(obj_request->which != BAD_WHICH);
-       } else {
-               rbd_assert(obj_request->which == BAD_WHICH);
-       }
+       dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req,
+            osd_req->r_result, obj_req);
+       rbd_assert(osd_req == obj_req->osd_req);
 
-       if (osd_req->r_result < 0)
-               obj_request->result = osd_req->r_result;
-
-       /*
-        * We support a 64-bit length, but ultimately it has to be
-        * passed to the block layer, which just supports a 32-bit
-        * length field.
-        */
-       obj_request->xferred = osd_req->r_ops[0].outdata_len;
-       rbd_assert(obj_request->xferred < (u64)UINT_MAX);
-
-       opcode = osd_req->r_ops[0].op;
-       switch (opcode) {
-       case CEPH_OSD_OP_READ:
-               rbd_osd_read_callback(obj_request);
-               break;
-       case CEPH_OSD_OP_SETALLOCHINT:
-               rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE ||
-                          osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL);
-               /* fall through */
-       case CEPH_OSD_OP_WRITE:
-       case CEPH_OSD_OP_WRITEFULL:
-               rbd_osd_write_callback(obj_request);
-               break;
-       case CEPH_OSD_OP_STAT:
-               rbd_osd_stat_callback(obj_request);
-               break;
-       case CEPH_OSD_OP_DELETE:
-       case CEPH_OSD_OP_TRUNCATE:
-       case CEPH_OSD_OP_ZERO:
-               rbd_osd_discard_callback(obj_request);
-               break;
-       case CEPH_OSD_OP_CALL:
-               rbd_osd_call_callback(obj_request);
-               break;
-       default:
-               rbd_warn(NULL, "unexpected OSD op: object_no %016llx opcode %d",
-                        obj_request->object_no, opcode);
-               break;
-       }
+       obj_req->result = osd_req->r_result < 0 ? osd_req->r_result : 0;
+       if (!obj_req->result && !rbd_img_is_write(obj_req->img_request))
+               obj_req->xferred = osd_req->r_result;
+       else
+               /*
+                * Writes aren't allowed to return a data payload.  In some
+                * guarded write cases (e.g. stat + zero on an empty object)
+                * a stat response makes it through, but we don't care.
+                */
+               obj_req->xferred = 0;
 
-       if (obj_request_done_test(obj_request))
-               rbd_obj_request_complete(obj_request);
+       rbd_obj_handle_request(obj_req);
 }
 
 static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
 {
        struct ceph_osd_request *osd_req = obj_request->osd_req;
 
-       rbd_assert(obj_request_img_data_test(obj_request));
+       osd_req->r_flags = CEPH_OSD_FLAG_READ;
        osd_req->r_snapid = obj_request->img_request->snap_id;
 }
 
@@ -1887,32 +1432,33 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
 {
        struct ceph_osd_request *osd_req = obj_request->osd_req;
 
+       osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
        ktime_get_real_ts(&osd_req->r_mtime);
-       osd_req->r_data_offset = obj_request->offset;
+       osd_req->r_data_offset = obj_request->ex.oe_off;
 }
 
 static struct ceph_osd_request *
-__rbd_osd_req_create(struct rbd_device *rbd_dev,
-                    struct ceph_snap_context *snapc,
-                    int num_ops, unsigned int flags,
-                    struct rbd_obj_request *obj_request)
+rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
 {
+       struct rbd_img_request *img_req = obj_req->img_request;
+       struct rbd_device *rbd_dev = img_req->rbd_dev;
        struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
        struct ceph_osd_request *req;
        const char *name_format = rbd_dev->image_format == 1 ?
                                      RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
 
-       req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO);
+       req = ceph_osdc_alloc_request(osdc,
+                       (rbd_img_is_write(img_req) ? img_req->snapc : NULL),
+                       num_ops, false, GFP_NOIO);
        if (!req)
                return NULL;
 
-       req->r_flags = flags;
        req->r_callback = rbd_osd_req_callback;
-       req->r_priv = obj_request;
+       req->r_priv = obj_req;
 
        req->r_base_oloc.pool = rbd_dev->layout.pool_id;
        if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
-                       rbd_dev->header.object_prefix, obj_request->object_no))
+                       rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
                goto err_req;
 
        if (ceph_osdc_alloc_messages(req, GFP_NOIO))
@@ -1925,83 +1471,20 @@ err_req:
        return NULL;
 }
 
-/*
- * Create an osd request.  A read request has one osd op (read).
- * A write request has either one (watch) or two (hint+write) osd ops.
- * (All rbd data writes are prefixed with an allocation hint op, but
- * technically osd watch is a write request, hence this distinction.)
- */
-static struct ceph_osd_request *rbd_osd_req_create(
-                                       struct rbd_device *rbd_dev,
-                                       enum obj_operation_type op_type,
-                                       unsigned int num_ops,
-                                       struct rbd_obj_request *obj_request)
-{
-       struct ceph_snap_context *snapc = NULL;
-
-       if (obj_request_img_data_test(obj_request) &&
-               (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) {
-               struct rbd_img_request *img_request = obj_request->img_request;
-               if (op_type == OBJ_OP_WRITE) {
-                       rbd_assert(img_request_write_test(img_request));
-               } else {
-                       rbd_assert(img_request_discard_test(img_request));
-               }
-               snapc = img_request->snapc;
-       }
-
-       rbd_assert(num_ops == 1 || ((op_type == OBJ_OP_WRITE) && num_ops == 2));
-
-       return __rbd_osd_req_create(rbd_dev, snapc, num_ops,
-           (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) ?
-           CEPH_OSD_FLAG_WRITE : CEPH_OSD_FLAG_READ, obj_request);
-}
-
-/*
- * Create a copyup osd request based on the information in the object
- * request supplied.  A copyup request has two or three osd ops, a
- * copyup method call, potentially a hint op, and a write or truncate
- * or zero op.
- */
-static struct ceph_osd_request *
-rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
-{
-       struct rbd_img_request *img_request;
-       int num_osd_ops = 3;
-
-       rbd_assert(obj_request_img_data_test(obj_request));
-       img_request = obj_request->img_request;
-       rbd_assert(img_request);
-       rbd_assert(img_request_write_test(img_request) ||
-                       img_request_discard_test(img_request));
-
-       if (img_request_discard_test(img_request))
-               num_osd_ops = 2;
-
-       return __rbd_osd_req_create(img_request->rbd_dev,
-                                   img_request->snapc, num_osd_ops,
-                                   CEPH_OSD_FLAG_WRITE, obj_request);
-}
-
 static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
 {
        ceph_osdc_put_request(osd_req);
 }
 
-static struct rbd_obj_request *
-rbd_obj_request_create(enum obj_request_type type)
+static struct rbd_obj_request *rbd_obj_request_create(void)
 {
        struct rbd_obj_request *obj_request;
 
-       rbd_assert(obj_request_type_valid(type));
-
        obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
        if (!obj_request)
                return NULL;
 
-       obj_request->which = BAD_WHICH;
-       obj_request->type = type;
-       INIT_LIST_HEAD(&obj_request->links);
+       ceph_object_extent_init(&obj_request->ex);
        kref_init(&obj_request->kref);
 
        dout("%s %p\n", __func__, obj_request);
@@ -2011,32 +1494,34 @@ rbd_obj_request_create(enum obj_request_type type)
 static void rbd_obj_request_destroy(struct kref *kref)
 {
        struct rbd_obj_request *obj_request;
+       u32 i;
 
        obj_request = container_of(kref, struct rbd_obj_request, kref);
 
        dout("%s: obj %p\n", __func__, obj_request);
 
-       rbd_assert(obj_request->img_request == NULL);
-       rbd_assert(obj_request->which == BAD_WHICH);
-
        if (obj_request->osd_req)
                rbd_osd_req_destroy(obj_request->osd_req);
 
-       rbd_assert(obj_request_type_valid(obj_request->type));
-       switch (obj_request->type) {
+       switch (obj_request->img_request->data_type) {
        case OBJ_REQUEST_NODATA:
-               break;          /* Nothing to do */
        case OBJ_REQUEST_BIO:
-               if (obj_request->bio_list)
-                       bio_chain_put(obj_request->bio_list);
-               break;
-       case OBJ_REQUEST_PAGES:
-               /* img_data requests don't own their page array */
-               if (obj_request->pages &&
-                   !obj_request_img_data_test(obj_request))
-                       ceph_release_page_vector(obj_request->pages,
-                                               obj_request->page_count);
+       case OBJ_REQUEST_BVECS:
+               break;          /* Nothing to do */
+       case OBJ_REQUEST_OWN_BVECS:
+               kfree(obj_request->bvec_pos.bvecs);
                break;
+       default:
+               rbd_assert(0);
+       }
+
+       kfree(obj_request->img_extents);
+       if (obj_request->copyup_bvecs) {
+               for (i = 0; i < obj_request->copyup_bvec_count; i++) {
+                       if (obj_request->copyup_bvecs[i].bv_page)
+                               __free_page(obj_request->copyup_bvecs[i].bv_page);
+               }
+               kfree(obj_request->copyup_bvecs);
        }
 
        kmem_cache_free(rbd_obj_request_cache, obj_request);
@@ -2111,7 +1596,6 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
  */
 static struct rbd_img_request *rbd_img_request_create(
                                        struct rbd_device *rbd_dev,
-                                       u64 offset, u64 length,
                                        enum obj_operation_type op_type,
                                        struct ceph_snap_context *snapc)
 {
@@ -2122,27 +1606,21 @@ static struct rbd_img_request *rbd_img_request_create(
                return NULL;
 
        img_request->rbd_dev = rbd_dev;
-       img_request->offset = offset;
-       img_request->length = length;
-       if (op_type == OBJ_OP_DISCARD) {
-               img_request_discard_set(img_request);
-               img_request->snapc = snapc;
-       } else if (op_type == OBJ_OP_WRITE) {
-               img_request_write_set(img_request);
-               img_request->snapc = snapc;
-       } else {
+       img_request->op_type = op_type;
+       if (!rbd_img_is_write(img_request))
                img_request->snap_id = rbd_dev->spec->snap_id;
-       }
+       else
+               img_request->snapc = snapc;
+
        if (rbd_dev_parent_get(rbd_dev))
                img_request_layered_set(img_request);
 
        spin_lock_init(&img_request->completion_lock);
-       INIT_LIST_HEAD(&img_request->obj_requests);
+       INIT_LIST_HEAD(&img_request->object_extents);
        kref_init(&img_request->kref);
 
-       dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
-               obj_op_name(op_type), offset, length, img_request);
-
+       dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
+            obj_op_name(op_type), img_request);
        return img_request;
 }
 
@@ -2165,829 +1643,934 @@ static void rbd_img_request_destroy(struct kref *kref)
                rbd_dev_parent_put(img_request->rbd_dev);
        }
 
-       if (img_request_write_test(img_request) ||
-               img_request_discard_test(img_request))
+       if (rbd_img_is_write(img_request))
                ceph_put_snap_context(img_request->snapc);
 
        kmem_cache_free(rbd_img_request_cache, img_request);
 }
 
-static struct rbd_img_request *rbd_parent_request_create(
-                                       struct rbd_obj_request *obj_request,
-                                       u64 img_offset, u64 length)
+static void prune_extents(struct ceph_file_extent *img_extents,
+                         u32 *num_img_extents, u64 overlap)
 {
-       struct rbd_img_request *parent_request;
-       struct rbd_device *rbd_dev;
+       u32 cnt = *num_img_extents;
 
-       rbd_assert(obj_request->img_request);
-       rbd_dev = obj_request->img_request->rbd_dev;
+       /* drop extents completely beyond the overlap */
+       while (cnt && img_extents[cnt - 1].fe_off >= overlap)
+               cnt--;
 
-       parent_request = rbd_img_request_create(rbd_dev->parent, img_offset,
-                                               length, OBJ_OP_READ, NULL);
-       if (!parent_request)
-               return NULL;
+       if (cnt) {
+               struct ceph_file_extent *ex = &img_extents[cnt - 1];
 
-       img_request_child_set(parent_request);
-       rbd_obj_request_get(obj_request);
-       parent_request->obj_request = obj_request;
+               /* trim final overlapping extent */
+               if (ex->fe_off + ex->fe_len > overlap)
+                       ex->fe_len = overlap - ex->fe_off;
+       }
 
-       return parent_request;
+       *num_img_extents = cnt;
 }
 
-static void rbd_parent_request_destroy(struct kref *kref)
+/*
+ * Determine the byte range(s) covered by either just the object extent
+ * or the entire object in the parent image.
+ */
+static int rbd_obj_calc_img_extents(struct rbd_obj_request *obj_req,
+                                   bool entire)
 {
-       struct rbd_img_request *parent_request;
-       struct rbd_obj_request *orig_request;
+       struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+       int ret;
 
-       parent_request = container_of(kref, struct rbd_img_request, kref);
-       orig_request = parent_request->obj_request;
+       if (!rbd_dev->parent_overlap)
+               return 0;
 
-       parent_request->obj_request = NULL;
-       rbd_obj_request_put(orig_request);
-       img_request_child_clear(parent_request);
+       ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno,
+                                 entire ? 0 : obj_req->ex.oe_off,
+                                 entire ? rbd_dev->layout.object_size :
+                                                       obj_req->ex.oe_len,
+                                 &obj_req->img_extents,
+                                 &obj_req->num_img_extents);
+       if (ret)
+               return ret;
 
-       rbd_img_request_destroy(kref);
+       prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
+                     rbd_dev->parent_overlap);
+       return 0;
 }
 
-static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
+static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which)
 {
-       struct rbd_img_request *img_request;
-       unsigned int xferred;
-       int result;
-       bool more;
-
-       rbd_assert(obj_request_img_data_test(obj_request));
-       img_request = obj_request->img_request;
-
-       rbd_assert(obj_request->xferred <= (u64)UINT_MAX);
-       xferred = (unsigned int)obj_request->xferred;
-       result = obj_request->result;
-       if (result) {
-               struct rbd_device *rbd_dev = img_request->rbd_dev;
-               enum obj_operation_type op_type;
-
-               if (img_request_discard_test(img_request))
-                       op_type = OBJ_OP_DISCARD;
-               else if (img_request_write_test(img_request))
-                       op_type = OBJ_OP_WRITE;
-               else
-                       op_type = OBJ_OP_READ;
-
-               rbd_warn(rbd_dev, "%s %llx at %llx (%llx)",
-                       obj_op_name(op_type), obj_request->length,
-                       obj_request->img_offset, obj_request->offset);
-               rbd_warn(rbd_dev, "  result %d xferred %x",
-                       result, xferred);
-               if (!img_request->result)
-                       img_request->result = result;
-               /*
-                * Need to end I/O on the entire obj_request worth of
-                * bytes in case of error.
-                */
-               xferred = obj_request->length;
+       switch (obj_req->img_request->data_type) {
+       case OBJ_REQUEST_BIO:
+               osd_req_op_extent_osd_data_bio(obj_req->osd_req, which,
+                                              &obj_req->bio_pos,
+                                              obj_req->ex.oe_len);
+               break;
+       case OBJ_REQUEST_BVECS:
+       case OBJ_REQUEST_OWN_BVECS:
+               rbd_assert(obj_req->bvec_pos.iter.bi_size ==
+                                                       obj_req->ex.oe_len);
+               rbd_assert(obj_req->bvec_idx == obj_req->bvec_count);
+               osd_req_op_extent_osd_data_bvec_pos(obj_req->osd_req, which,
+                                                   &obj_req->bvec_pos);
+               break;
+       default:
+               rbd_assert(0);
        }
+}
 
-       if (img_request_child_test(img_request)) {
-               rbd_assert(img_request->obj_request != NULL);
-               more = obj_request->which < img_request->obj_request_count - 1;
-       } else {
-               blk_status_t status = errno_to_blk_status(result);
+static int rbd_obj_setup_read(struct rbd_obj_request *obj_req)
+{
+       obj_req->osd_req = rbd_osd_req_create(obj_req, 1);
+       if (!obj_req->osd_req)
+               return -ENOMEM;
 
-               rbd_assert(img_request->rq != NULL);
+       osd_req_op_extent_init(obj_req->osd_req, 0, CEPH_OSD_OP_READ,
+                              obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
+       rbd_osd_req_setup_data(obj_req, 0);
 
-               more = blk_update_request(img_request->rq, status, xferred);
-               if (!more)
-                       __blk_mq_end_request(img_request->rq, status);
-       }
+       rbd_osd_req_format_read(obj_req);
+       return 0;
+}
+
+static int __rbd_obj_setup_stat(struct rbd_obj_request *obj_req,
+                               unsigned int which)
+{
+       struct page **pages;
 
-       return more;
+       /*
+        * The response data for a STAT call consists of:
+        *     le64 length;
+        *     struct {
+        *         le32 tv_sec;
+        *         le32 tv_nsec;
+        *     } mtime;
+        */
+       pages = ceph_alloc_page_vector(1, GFP_NOIO);
+       if (IS_ERR(pages))
+               return PTR_ERR(pages);
+
+       osd_req_op_init(obj_req->osd_req, which, CEPH_OSD_OP_STAT, 0);
+       osd_req_op_raw_data_in_pages(obj_req->osd_req, which, pages,
+                                    8 + sizeof(struct ceph_timespec),
+                                    0, false, true);
+       return 0;
 }
 
-static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
+static void __rbd_obj_setup_write(struct rbd_obj_request *obj_req,
+                                 unsigned int which)
 {
-       struct rbd_img_request *img_request;
-       u32 which = obj_request->which;
-       bool more = true;
+       struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+       u16 opcode;
 
-       rbd_assert(obj_request_img_data_test(obj_request));
-       img_request = obj_request->img_request;
+       osd_req_op_alloc_hint_init(obj_req->osd_req, which++,
+                                  rbd_dev->layout.object_size,
+                                  rbd_dev->layout.object_size);
 
-       dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
-       rbd_assert(img_request != NULL);
-       rbd_assert(img_request->obj_request_count > 0);
-       rbd_assert(which != BAD_WHICH);
-       rbd_assert(which < img_request->obj_request_count);
+       if (rbd_obj_is_entire(obj_req))
+               opcode = CEPH_OSD_OP_WRITEFULL;
+       else
+               opcode = CEPH_OSD_OP_WRITE;
 
-       spin_lock_irq(&img_request->completion_lock);
-       if (which != img_request->next_completion)
-               goto out;
+       osd_req_op_extent_init(obj_req->osd_req, which, opcode,
+                              obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
+       rbd_osd_req_setup_data(obj_req, which++);
 
-       for_each_obj_request_from(img_request, obj_request) {
-               rbd_assert(more);
-               rbd_assert(which < img_request->obj_request_count);
+       rbd_assert(which == obj_req->osd_req->r_num_ops);
+       rbd_osd_req_format_write(obj_req);
+}
 
-               if (!obj_request_done_test(obj_request))
-                       break;
-               more = rbd_img_obj_end_request(obj_request);
-               which++;
+static int rbd_obj_setup_write(struct rbd_obj_request *obj_req)
+{
+       unsigned int num_osd_ops, which = 0;
+       int ret;
+
+       /* reverse map the entire object onto the parent */
+       ret = rbd_obj_calc_img_extents(obj_req, true);
+       if (ret)
+               return ret;
+
+       if (obj_req->num_img_extents) {
+               obj_req->write_state = RBD_OBJ_WRITE_GUARD;
+               num_osd_ops = 3; /* stat + setallochint + write/writefull */
+       } else {
+               obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+               num_osd_ops = 2; /* setallochint + write/writefull */
        }
 
-       rbd_assert(more ^ (which == img_request->obj_request_count));
-       img_request->next_completion = which;
-out:
-       spin_unlock_irq(&img_request->completion_lock);
-       rbd_img_request_put(img_request);
+       obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
+       if (!obj_req->osd_req)
+               return -ENOMEM;
 
-       if (!more)
-               rbd_img_request_complete(img_request);
+       if (obj_req->num_img_extents) {
+               ret = __rbd_obj_setup_stat(obj_req, which++);
+               if (ret)
+                       return ret;
+       }
+
+       __rbd_obj_setup_write(obj_req, which);
+       return 0;
 }
 
-/*
- * Add individual osd ops to the given ceph_osd_request and prepare
- * them for submission. num_ops is the current number of
- * osd operations already to the object request.
- */
-static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
-                               struct ceph_osd_request *osd_request,
-                               enum obj_operation_type op_type,
-                               unsigned int num_ops)
-{
-       struct rbd_img_request *img_request = obj_request->img_request;
-       struct rbd_device *rbd_dev = img_request->rbd_dev;
-       u64 object_size = rbd_obj_bytes(&rbd_dev->header);
-       u64 offset = obj_request->offset;
-       u64 length = obj_request->length;
-       u64 img_end;
+static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req,
+                                   unsigned int which)
+{
        u16 opcode;
 
-       if (op_type == OBJ_OP_DISCARD) {
-               if (!offset && length == object_size &&
-                   (!img_request_layered_test(img_request) ||
-                    !obj_request_overlaps_parent(obj_request))) {
-                       opcode = CEPH_OSD_OP_DELETE;
-               } else if ((offset + length == object_size)) {
+       if (rbd_obj_is_entire(obj_req)) {
+               if (obj_req->num_img_extents) {
+                       osd_req_op_init(obj_req->osd_req, which++,
+                                       CEPH_OSD_OP_CREATE, 0);
                        opcode = CEPH_OSD_OP_TRUNCATE;
                } else {
-                       down_read(&rbd_dev->header_rwsem);
-                       img_end = rbd_dev->header.image_size;
-                       up_read(&rbd_dev->header_rwsem);
-
-                       if (obj_request->img_offset + length == img_end)
-                               opcode = CEPH_OSD_OP_TRUNCATE;
-                       else
-                               opcode = CEPH_OSD_OP_ZERO;
+                       osd_req_op_init(obj_req->osd_req, which++,
+                                       CEPH_OSD_OP_DELETE, 0);
+                       opcode = 0;
                }
-       } else if (op_type == OBJ_OP_WRITE) {
-               if (!offset && length == object_size)
-                       opcode = CEPH_OSD_OP_WRITEFULL;
-               else
-                       opcode = CEPH_OSD_OP_WRITE;
-               osd_req_op_alloc_hint_init(osd_request, num_ops,
-                                       object_size, object_size);
-               num_ops++;
+       } else if (rbd_obj_is_tail(obj_req)) {
+               opcode = CEPH_OSD_OP_TRUNCATE;
        } else {
-               opcode = CEPH_OSD_OP_READ;
+               opcode = CEPH_OSD_OP_ZERO;
        }
 
-       if (opcode == CEPH_OSD_OP_DELETE)
-               osd_req_op_init(osd_request, num_ops, opcode, 0);
-       else
-               osd_req_op_extent_init(osd_request, num_ops, opcode,
-                                      offset, length, 0, 0);
-
-       if (obj_request->type == OBJ_REQUEST_BIO)
-               osd_req_op_extent_osd_data_bio(osd_request, num_ops,
-                                       obj_request->bio_list, length);
-       else if (obj_request->type == OBJ_REQUEST_PAGES)
-               osd_req_op_extent_osd_data_pages(osd_request, num_ops,
-                                       obj_request->pages, length,
-                                       offset & ~PAGE_MASK, false, false);
-
-       /* Discards are also writes */
-       if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
-               rbd_osd_req_format_write(obj_request);
-       else
-               rbd_osd_req_format_read(obj_request);
+       if (opcode)
+               osd_req_op_extent_init(obj_req->osd_req, which++, opcode,
+                                      obj_req->ex.oe_off, obj_req->ex.oe_len,
+                                      0, 0);
+
+       rbd_assert(which == obj_req->osd_req->r_num_ops);
+       rbd_osd_req_format_write(obj_req);
 }
 
-/*
- * Split up an image request into one or more object requests, each
- * to a different object.  The "type" parameter indicates whether
- * "data_desc" is the pointer to the head of a list of bio
- * structures, or the base of a page array.  In either case this
- * function assumes data_desc describes memory sufficient to hold
- * all data described by the image request.
- */
-static int rbd_img_request_fill(struct rbd_img_request *img_request,
-                                       enum obj_request_type type,
-                                       void *data_desc)
+static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
 {
-       struct rbd_device *rbd_dev = img_request->rbd_dev;
-       struct rbd_obj_request *obj_request = NULL;
-       struct rbd_obj_request *next_obj_request;
-       struct bio *bio_list = NULL;
-       unsigned int bio_offset = 0;
-       struct page **pages = NULL;
-       enum obj_operation_type op_type;
-       u64 img_offset;
-       u64 resid;
-
-       dout("%s: img %p type %d data_desc %p\n", __func__, img_request,
-               (int)type, data_desc);
+       unsigned int num_osd_ops, which = 0;
+       int ret;
 
-       img_offset = img_request->offset;
-       resid = img_request->length;
-       rbd_assert(resid > 0);
-       op_type = rbd_img_request_op_type(img_request);
+       /* reverse map the entire object onto the parent */
+       ret = rbd_obj_calc_img_extents(obj_req, true);
+       if (ret)
+               return ret;
 
-       if (type == OBJ_REQUEST_BIO) {
-               bio_list = data_desc;
-               rbd_assert(img_offset ==
-                          bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
-       } else if (type == OBJ_REQUEST_PAGES) {
-               pages = data_desc;
+       if (rbd_obj_is_entire(obj_req)) {
+               obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+               if (obj_req->num_img_extents)
+                       num_osd_ops = 2; /* create + truncate */
+               else
+                       num_osd_ops = 1; /* delete */
+       } else {
+               if (obj_req->num_img_extents) {
+                       obj_req->write_state = RBD_OBJ_WRITE_GUARD;
+                       num_osd_ops = 2; /* stat + truncate/zero */
+               } else {
+                       obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+                       num_osd_ops = 1; /* truncate/zero */
+               }
        }
 
-       while (resid) {
-               struct ceph_osd_request *osd_req;
-               u64 object_no = img_offset >> rbd_dev->header.obj_order;
-               u64 offset = rbd_segment_offset(rbd_dev, img_offset);
-               u64 length = rbd_segment_length(rbd_dev, img_offset, resid);
-
-               obj_request = rbd_obj_request_create(type);
-               if (!obj_request)
-                       goto out_unwind;
-
-               obj_request->object_no = object_no;
-               obj_request->offset = offset;
-               obj_request->length = length;
-
-               /*
-                * set obj_request->img_request before creating the
-                * osd_request so that it gets the right snapc
-                */
-               rbd_img_obj_request_add(img_request, obj_request);
-
-               if (type == OBJ_REQUEST_BIO) {
-                       unsigned int clone_size;
-
-                       rbd_assert(length <= (u64)UINT_MAX);
-                       clone_size = (unsigned int)length;
-                       obj_request->bio_list =
-                                       bio_chain_clone_range(&bio_list,
-                                                               &bio_offset,
-                                                               clone_size,
-                                                               GFP_NOIO);
-                       if (!obj_request->bio_list)
-                               goto out_unwind;
-               } else if (type == OBJ_REQUEST_PAGES) {
-                       unsigned int page_count;
-
-                       obj_request->pages = pages;
-                       page_count = (u32)calc_pages_for(offset, length);
-                       obj_request->page_count = page_count;
-                       if ((offset + length) & ~PAGE_MASK)
-                               page_count--;   /* more on last page */
-                       pages += page_count;
-               }
+       obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
+       if (!obj_req->osd_req)
+               return -ENOMEM;
 
-               osd_req = rbd_osd_req_create(rbd_dev, op_type,
-                                       (op_type == OBJ_OP_WRITE) ? 2 : 1,
-                                       obj_request);
-               if (!osd_req)
-                       goto out_unwind;
+       if (!rbd_obj_is_entire(obj_req) && obj_req->num_img_extents) {
+               ret = __rbd_obj_setup_stat(obj_req, which++);
+               if (ret)
+                       return ret;
+       }
 
-               obj_request->osd_req = osd_req;
-               obj_request->callback = rbd_img_obj_callback;
-               obj_request->img_offset = img_offset;
+       __rbd_obj_setup_discard(obj_req, which);
+       return 0;
+}
 
-               rbd_img_obj_request_fill(obj_request, osd_req, op_type, 0);
+/*
+ * For each object request in @img_req, allocate an OSD request, add
+ * individual OSD ops and prepare them for submission.  The number of
+ * OSD ops depends on op_type and the overlap point (if any).
+ */
+static int __rbd_img_fill_request(struct rbd_img_request *img_req)
+{
+       struct rbd_obj_request *obj_req;
+       int ret;
 
-               img_offset += length;
-               resid -= length;
+       for_each_obj_request(img_req, obj_req) {
+               switch (img_req->op_type) {
+               case OBJ_OP_READ:
+                       ret = rbd_obj_setup_read(obj_req);
+                       break;
+               case OBJ_OP_WRITE:
+                       ret = rbd_obj_setup_write(obj_req);
+                       break;
+               case OBJ_OP_DISCARD:
+                       ret = rbd_obj_setup_discard(obj_req);
+                       break;
+               default:
+                       rbd_assert(0);
+               }
+               if (ret)
+                       return ret;
        }
 
        return 0;
+}
 
-out_unwind:
-       for_each_obj_request_safe(img_request, obj_request, next_obj_request)
-               rbd_img_obj_request_del(img_request, obj_request);
+union rbd_img_fill_iter {
+       struct ceph_bio_iter    bio_iter;
+       struct ceph_bvec_iter   bvec_iter;
+};
 
-       return -ENOMEM;
-}
+struct rbd_img_fill_ctx {
+       enum obj_request_type   pos_type;
+       union rbd_img_fill_iter *pos;
+       union rbd_img_fill_iter iter;
+       ceph_object_extent_fn_t set_pos_fn;
+       ceph_object_extent_fn_t count_fn;
+       ceph_object_extent_fn_t copy_fn;
+};
 
-static void
-rbd_osd_copyup_callback(struct rbd_obj_request *obj_request)
+static struct ceph_object_extent *alloc_object_extent(void *arg)
 {
-       struct rbd_img_request *img_request;
-       struct rbd_device *rbd_dev;
-       struct page **pages;
-       u32 page_count;
+       struct rbd_img_request *img_req = arg;
+       struct rbd_obj_request *obj_req;
 
-       dout("%s: obj %p\n", __func__, obj_request);
+       obj_req = rbd_obj_request_create();
+       if (!obj_req)
+               return NULL;
 
-       rbd_assert(obj_request->type == OBJ_REQUEST_BIO ||
-               obj_request->type == OBJ_REQUEST_NODATA);
-       rbd_assert(obj_request_img_data_test(obj_request));
-       img_request = obj_request->img_request;
-       rbd_assert(img_request);
+       rbd_img_obj_request_add(img_req, obj_req);
+       return &obj_req->ex;
+}
 
-       rbd_dev = img_request->rbd_dev;
-       rbd_assert(rbd_dev);
+/*
+ * While su != os && sc == 1 is technically not fancy (it's the same
+ * layout as su == os && sc == 1), we can't use the nocopy path for it
+ * because ->set_pos_fn() should be called only once per object.
+ * ceph_file_to_extents() invokes action_fn once per stripe unit, so
+ * treat su != os && sc == 1 as fancy.
+ */
+static bool rbd_layout_is_fancy(struct ceph_file_layout *l)
+{
+       return l->stripe_unit != l->object_size;
+}
 
-       pages = obj_request->copyup_pages;
-       rbd_assert(pages != NULL);
-       obj_request->copyup_pages = NULL;
-       page_count = obj_request->copyup_page_count;
-       rbd_assert(page_count);
-       obj_request->copyup_page_count = 0;
-       ceph_release_page_vector(pages, page_count);
+static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
+                                      struct ceph_file_extent *img_extents,
+                                      u32 num_img_extents,
+                                      struct rbd_img_fill_ctx *fctx)
+{
+       u32 i;
+       int ret;
+
+       img_req->data_type = fctx->pos_type;
 
        /*
-        * We want the transfer count to reflect the size of the
-        * original write request.  There is no such thing as a
-        * successful short write, so if the request was successful
-        * we can just set it to the originally-requested length.
+        * Create object requests and set each object request's starting
+        * position in the provided bio (list) or bio_vec array.
         */
-       if (!obj_request->result)
-               obj_request->xferred = obj_request->length;
+       fctx->iter = *fctx->pos;
+       for (i = 0; i < num_img_extents; i++) {
+               ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
+                                          img_extents[i].fe_off,
+                                          img_extents[i].fe_len,
+                                          &img_req->object_extents,
+                                          alloc_object_extent, img_req,
+                                          fctx->set_pos_fn, &fctx->iter);
+               if (ret)
+                       return ret;
+       }
 
-       obj_request_done_set(obj_request);
+       return __rbd_img_fill_request(img_req);
 }
 
-static void
-rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
+/*
+ * Map a list of image extents to a list of object extents, create the
+ * corresponding object requests (normally each to a different object,
+ * but not always) and add them to @img_req.  For each object request,
+ * set up its data descriptor to point to the corresponding chunk(s) of
+ * @fctx->pos data buffer.
+ *
+ * Because ceph_file_to_extents() will merge adjacent object extents
+ * together, each object request's data descriptor may point to multiple
+ * different chunks of @fctx->pos data buffer.
+ *
+ * @fctx->pos data buffer is assumed to be large enough.
+ */
+static int rbd_img_fill_request(struct rbd_img_request *img_req,
+                               struct ceph_file_extent *img_extents,
+                               u32 num_img_extents,
+                               struct rbd_img_fill_ctx *fctx)
 {
-       struct rbd_obj_request *orig_request;
-       struct ceph_osd_request *osd_req;
-       struct rbd_device *rbd_dev;
-       struct page **pages;
-       enum obj_operation_type op_type;
-       u32 page_count;
-       int img_result;
-       u64 parent_length;
-
-       rbd_assert(img_request_child_test(img_request));
-
-       /* First get what we need from the image request */
-
-       pages = img_request->copyup_pages;
-       rbd_assert(pages != NULL);
-       img_request->copyup_pages = NULL;
-       page_count = img_request->copyup_page_count;
-       rbd_assert(page_count);
-       img_request->copyup_page_count = 0;
-
-       orig_request = img_request->obj_request;
-       rbd_assert(orig_request != NULL);
-       rbd_assert(obj_request_type_valid(orig_request->type));
-       img_result = img_request->result;
-       parent_length = img_request->length;
-       rbd_assert(img_result || parent_length == img_request->xferred);
-       rbd_img_request_put(img_request);
+       struct rbd_device *rbd_dev = img_req->rbd_dev;
+       struct rbd_obj_request *obj_req;
+       u32 i;
+       int ret;
+
+       if (fctx->pos_type == OBJ_REQUEST_NODATA ||
+           !rbd_layout_is_fancy(&rbd_dev->layout))
+               return rbd_img_fill_request_nocopy(img_req, img_extents,
+                                                  num_img_extents, fctx);
 
-       rbd_assert(orig_request->img_request);
-       rbd_dev = orig_request->img_request->rbd_dev;
-       rbd_assert(rbd_dev);
+       img_req->data_type = OBJ_REQUEST_OWN_BVECS;
 
        /*
-        * If the overlap has become 0 (most likely because the
-        * image has been flattened) we need to free the pages
-        * and re-submit the original write request.
+        * Create object requests and determine ->bvec_count for each object
+        * request.  Note that ->bvec_count sum over all object requests may
+        * be greater than the number of bio_vecs in the provided bio (list)
+        * or bio_vec array because when mapped, those bio_vecs can straddle
+        * stripe unit boundaries.
         */
-       if (!rbd_dev->parent_overlap) {
-               ceph_release_page_vector(pages, page_count);
-               rbd_obj_request_submit(orig_request);
-               return;
+       fctx->iter = *fctx->pos;
+       for (i = 0; i < num_img_extents; i++) {
+               ret = ceph_file_to_extents(&rbd_dev->layout,
+                                          img_extents[i].fe_off,
+                                          img_extents[i].fe_len,
+                                          &img_req->object_extents,
+                                          alloc_object_extent, img_req,
+                                          fctx->count_fn, &fctx->iter);
+               if (ret)
+                       return ret;
        }
 
-       if (img_result)
-               goto out_err;
+       for_each_obj_request(img_req, obj_req) {
+               obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count,
+                                             sizeof(*obj_req->bvec_pos.bvecs),
+                                             GFP_NOIO);
+               if (!obj_req->bvec_pos.bvecs)
+                       return -ENOMEM;
+       }
 
        /*
-        * The original osd request is of no use to use any more.
-        * We need a new one that can hold the three ops in a copyup
-        * request.  Allocate the new copyup osd request for the
-        * original request, and release the old one.
+        * Fill in each object request's private bio_vec array, splitting and
+        * rearranging the provided bio_vecs in stripe unit chunks as needed.
         */
-       img_result = -ENOMEM;
-       osd_req = rbd_osd_req_create_copyup(orig_request);
-       if (!osd_req)
-               goto out_err;
-       rbd_osd_req_destroy(orig_request->osd_req);
-       orig_request->osd_req = osd_req;
-       orig_request->copyup_pages = pages;
-       orig_request->copyup_page_count = page_count;
+       fctx->iter = *fctx->pos;
+       for (i = 0; i < num_img_extents; i++) {
+               ret = ceph_iterate_extents(&rbd_dev->layout,
+                                          img_extents[i].fe_off,
+                                          img_extents[i].fe_len,
+                                          &img_req->object_extents,
+                                          fctx->copy_fn, &fctx->iter);
+               if (ret)
+                       return ret;
+       }
 
-       /* Initialize the copyup op */
+       return __rbd_img_fill_request(img_req);
+}
+
+static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
+                              u64 off, u64 len)
+{
+       struct ceph_file_extent ex = { off, len };
+       union rbd_img_fill_iter dummy;
+       struct rbd_img_fill_ctx fctx = {
+               .pos_type = OBJ_REQUEST_NODATA,
+               .pos = &dummy,
+       };
+
+       return rbd_img_fill_request(img_req, &ex, 1, &fctx);
+}
+
+static void set_bio_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+       struct rbd_obj_request *obj_req =
+           container_of(ex, struct rbd_obj_request, ex);
+       struct ceph_bio_iter *it = arg;
 
-       osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
-       osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0,
-                                               false, false);
+       dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
+       obj_req->bio_pos = *it;
+       ceph_bio_iter_advance(it, bytes);
+}
 
-       /* Add the other op(s) */
+static void count_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+       struct rbd_obj_request *obj_req =
+           container_of(ex, struct rbd_obj_request, ex);
+       struct ceph_bio_iter *it = arg;
 
-       op_type = rbd_img_request_op_type(orig_request->img_request);
-       rbd_img_obj_request_fill(orig_request, osd_req, op_type, 1);
+       dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
+       ceph_bio_iter_advance_step(it, bytes, ({
+               obj_req->bvec_count++;
+       }));
 
-       /* All set, send it off. */
+}
 
-       rbd_obj_request_submit(orig_request);
-       return;
+static void copy_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+       struct rbd_obj_request *obj_req =
+           container_of(ex, struct rbd_obj_request, ex);
+       struct ceph_bio_iter *it = arg;
 
-out_err:
-       ceph_release_page_vector(pages, page_count);
-       rbd_obj_request_error(orig_request, img_result);
+       dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
+       ceph_bio_iter_advance_step(it, bytes, ({
+               obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
+               obj_req->bvec_pos.iter.bi_size += bv.bv_len;
+       }));
 }
 
-/*
- * Read from the parent image the range of data that covers the
- * entire target of the given object request.  This is used for
- * satisfying a layered image write request when the target of an
- * object request from the image request does not exist.
- *
- * A page array big enough to hold the returned data is allocated
- * and supplied to rbd_img_request_fill() as the "data descriptor."
- * When the read completes, this page array will be transferred to
- * the original object request for the copyup operation.
- *
- * If an error occurs, it is recorded as the result of the original
- * object request in rbd_img_obj_exists_callback().
- */
-static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
-{
-       struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
-       struct rbd_img_request *parent_request = NULL;
-       u64 img_offset;
-       u64 length;
-       struct page **pages = NULL;
-       u32 page_count;
-       int result;
+static int __rbd_img_fill_from_bio(struct rbd_img_request *img_req,
+                                  struct ceph_file_extent *img_extents,
+                                  u32 num_img_extents,
+                                  struct ceph_bio_iter *bio_pos)
+{
+       struct rbd_img_fill_ctx fctx = {
+               .pos_type = OBJ_REQUEST_BIO,
+               .pos = (union rbd_img_fill_iter *)bio_pos,
+               .set_pos_fn = set_bio_pos,
+               .count_fn = count_bio_bvecs,
+               .copy_fn = copy_bio_bvecs,
+       };
 
-       rbd_assert(rbd_dev->parent != NULL);
+       return rbd_img_fill_request(img_req, img_extents, num_img_extents,
+                                   &fctx);
+}
 
-       /*
-        * Determine the byte range covered by the object in the
-        * child image to which the original request was to be sent.
-        */
-       img_offset = obj_request->img_offset - obj_request->offset;
-       length = rbd_obj_bytes(&rbd_dev->header);
+static int rbd_img_fill_from_bio(struct rbd_img_request *img_req,
+                                u64 off, u64 len, struct bio *bio)
+{
+       struct ceph_file_extent ex = { off, len };
+       struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter };
 
-       /*
-        * There is no defined parent data beyond the parent
-        * overlap, so limit what we read at that boundary if
-        * necessary.
-        */
-       if (img_offset + length > rbd_dev->parent_overlap) {
-               rbd_assert(img_offset < rbd_dev->parent_overlap);
-               length = rbd_dev->parent_overlap - img_offset;
-       }
+       return __rbd_img_fill_from_bio(img_req, &ex, 1, &it);
+}
 
-       /*
-        * Allocate a page array big enough to receive the data read
-        * from the parent.
-        */
-       page_count = (u32)calc_pages_for(0, length);
-       pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
-       if (IS_ERR(pages)) {
-               result = PTR_ERR(pages);
-               pages = NULL;
-               goto out_err;
-       }
+static void set_bvec_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+       struct rbd_obj_request *obj_req =
+           container_of(ex, struct rbd_obj_request, ex);
+       struct ceph_bvec_iter *it = arg;
 
-       result = -ENOMEM;
-       parent_request = rbd_parent_request_create(obj_request,
-                                               img_offset, length);
-       if (!parent_request)
-               goto out_err;
+       obj_req->bvec_pos = *it;
+       ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes);
+       ceph_bvec_iter_advance(it, bytes);
+}
 
-       result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
-       if (result)
-               goto out_err;
+static void count_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+       struct rbd_obj_request *obj_req =
+           container_of(ex, struct rbd_obj_request, ex);
+       struct ceph_bvec_iter *it = arg;
 
-       parent_request->copyup_pages = pages;
-       parent_request->copyup_page_count = page_count;
-       parent_request->callback = rbd_img_obj_parent_read_full_callback;
+       ceph_bvec_iter_advance_step(it, bytes, ({
+               obj_req->bvec_count++;
+       }));
+}
 
-       result = rbd_img_request_submit(parent_request);
-       if (!result)
-               return 0;
+static void copy_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+       struct rbd_obj_request *obj_req =
+           container_of(ex, struct rbd_obj_request, ex);
+       struct ceph_bvec_iter *it = arg;
 
-       parent_request->copyup_pages = NULL;
-       parent_request->copyup_page_count = 0;
-out_err:
-       if (pages)
-               ceph_release_page_vector(pages, page_count);
-       if (parent_request)
-               rbd_img_request_put(parent_request);
-       return result;
+       ceph_bvec_iter_advance_step(it, bytes, ({
+               obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
+               obj_req->bvec_pos.iter.bi_size += bv.bv_len;
+       }));
 }
 
-static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
+static int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
+                                    struct ceph_file_extent *img_extents,
+                                    u32 num_img_extents,
+                                    struct ceph_bvec_iter *bvec_pos)
 {
-       struct rbd_obj_request *orig_request;
-       struct rbd_device *rbd_dev;
-       int result;
+       struct rbd_img_fill_ctx fctx = {
+               .pos_type = OBJ_REQUEST_BVECS,
+               .pos = (union rbd_img_fill_iter *)bvec_pos,
+               .set_pos_fn = set_bvec_pos,
+               .count_fn = count_bvecs,
+               .copy_fn = copy_bvecs,
+       };
 
-       rbd_assert(!obj_request_img_data_test(obj_request));
+       return rbd_img_fill_request(img_req, img_extents, num_img_extents,
+                                   &fctx);
+}
 
-       /*
-        * All we need from the object request is the original
-        * request and the result of the STAT op.  Grab those, then
-        * we're done with the request.
-        */
-       orig_request = obj_request->obj_request;
-       obj_request->obj_request = NULL;
-       rbd_obj_request_put(orig_request);
-       rbd_assert(orig_request);
-       rbd_assert(orig_request->img_request);
-
-       result = obj_request->result;
-       obj_request->result = 0;
-
-       dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
-               obj_request, orig_request, result,
-               obj_request->xferred, obj_request->length);
-       rbd_obj_request_put(obj_request);
+static int rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
+                                  struct ceph_file_extent *img_extents,
+                                  u32 num_img_extents,
+                                  struct bio_vec *bvecs)
+{
+       struct ceph_bvec_iter it = {
+               .bvecs = bvecs,
+               .iter = { .bi_size = ceph_file_extents_bytes(img_extents,
+                                                            num_img_extents) },
+       };
 
-       /*
-        * If the overlap has become 0 (most likely because the
-        * image has been flattened) we need to re-submit the
-        * original request.
-        */
-       rbd_dev = orig_request->img_request->rbd_dev;
-       if (!rbd_dev->parent_overlap) {
-               rbd_obj_request_submit(orig_request);
-               return;
-       }
+       return __rbd_img_fill_from_bvecs(img_req, img_extents, num_img_extents,
+                                        &it);
+}
 
-       /*
-        * Our only purpose here is to determine whether the object
-        * exists, and we don't want to treat the non-existence as
-        * an error.  If something else comes back, transfer the
-        * error to the original request and complete it now.
-        */
-       if (!result) {
-               obj_request_existence_set(orig_request, true);
-       } else if (result == -ENOENT) {
-               obj_request_existence_set(orig_request, false);
-       } else {
-               goto fail_orig_request;
-       }
+static void rbd_img_request_submit(struct rbd_img_request *img_request)
+{
+       struct rbd_obj_request *obj_request;
 
-       /*
-        * Resubmit the original request now that we have recorded
-        * whether the target object exists.
-        */
-       result = rbd_img_obj_request_submit(orig_request);
-       if (result)
-               goto fail_orig_request;
+       dout("%s: img %p\n", __func__, img_request);
 
-       return;
+       rbd_img_request_get(img_request);
+       for_each_obj_request(img_request, obj_request)
+               rbd_obj_request_submit(obj_request);
 
-fail_orig_request:
-       rbd_obj_request_error(orig_request, result);
+       rbd_img_request_put(img_request);
 }
 
-static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
+static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
 {
-       struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
-       struct rbd_obj_request *stat_request;
-       struct page **pages;
-       u32 page_count;
-       size_t size;
+       struct rbd_img_request *img_req = obj_req->img_request;
+       struct rbd_img_request *child_img_req;
        int ret;
 
-       stat_request = rbd_obj_request_create(OBJ_REQUEST_PAGES);
-       if (!stat_request)
+       child_img_req = rbd_img_request_create(img_req->rbd_dev->parent,
+                                              OBJ_OP_READ, NULL);
+       if (!child_img_req)
                return -ENOMEM;
 
-       stat_request->object_no = obj_request->object_no;
+       __set_bit(IMG_REQ_CHILD, &child_img_req->flags);
+       child_img_req->obj_request = obj_req;
 
-       stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
-                                                  stat_request);
-       if (!stat_request->osd_req) {
-               ret = -ENOMEM;
-               goto fail_stat_request;
+       if (!rbd_img_is_write(img_req)) {
+               switch (img_req->data_type) {
+               case OBJ_REQUEST_BIO:
+                       ret = __rbd_img_fill_from_bio(child_img_req,
+                                                     obj_req->img_extents,
+                                                     obj_req->num_img_extents,
+                                                     &obj_req->bio_pos);
+                       break;
+               case OBJ_REQUEST_BVECS:
+               case OBJ_REQUEST_OWN_BVECS:
+                       ret = __rbd_img_fill_from_bvecs(child_img_req,
+                                                     obj_req->img_extents,
+                                                     obj_req->num_img_extents,
+                                                     &obj_req->bvec_pos);
+                       break;
+               default:
+                       rbd_assert(0);
+               }
+       } else {
+               ret = rbd_img_fill_from_bvecs(child_img_req,
+                                             obj_req->img_extents,
+                                             obj_req->num_img_extents,
+                                             obj_req->copyup_bvecs);
+       }
+       if (ret) {
+               rbd_img_request_put(child_img_req);
+               return ret;
+       }
+
+       rbd_img_request_submit(child_img_req);
+       return 0;
+}
+
+static bool rbd_obj_handle_read(struct rbd_obj_request *obj_req)
+{
+       struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+       int ret;
+
+       if (obj_req->result == -ENOENT &&
+           rbd_dev->parent_overlap && !obj_req->tried_parent) {
+               /* reverse map this object extent onto the parent */
+               ret = rbd_obj_calc_img_extents(obj_req, false);
+               if (ret) {
+                       obj_req->result = ret;
+                       return true;
+               }
+
+               if (obj_req->num_img_extents) {
+                       obj_req->tried_parent = true;
+                       ret = rbd_obj_read_from_parent(obj_req);
+                       if (ret) {
+                               obj_req->result = ret;
+                               return true;
+                       }
+                       return false;
+               }
        }
 
        /*
-        * The response data for a STAT call consists of:
-        *     le64 length;
-        *     struct {
-        *         le32 tv_sec;
-        *         le32 tv_nsec;
-        *     } mtime;
+        * -ENOENT means a hole in the image -- zero-fill the entire
+        * length of the request.  A short read also implies zero-fill
+        * to the end of the request.  In both cases we update xferred
+        * count to indicate the whole request was satisfied.
         */
-       size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
-       page_count = (u32)calc_pages_for(0, size);
-       pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
-       if (IS_ERR(pages)) {
-               ret = PTR_ERR(pages);
-               goto fail_stat_request;
+       if (obj_req->result == -ENOENT ||
+           (!obj_req->result && obj_req->xferred < obj_req->ex.oe_len)) {
+               rbd_assert(!obj_req->xferred || !obj_req->result);
+               rbd_obj_zero_range(obj_req, obj_req->xferred,
+                                  obj_req->ex.oe_len - obj_req->xferred);
+               obj_req->result = 0;
+               obj_req->xferred = obj_req->ex.oe_len;
        }
 
-       osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
-       osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
-                                    false, false);
-
-       rbd_obj_request_get(obj_request);
-       stat_request->obj_request = obj_request;
-       stat_request->pages = pages;
-       stat_request->page_count = page_count;
-       stat_request->callback = rbd_img_obj_exists_callback;
+       return true;
+}
 
-       rbd_obj_request_submit(stat_request);
-       return 0;
+/*
+ * copyup_bvecs pages are never highmem pages
+ */
+static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
+{
+       struct ceph_bvec_iter it = {
+               .bvecs = bvecs,
+               .iter = { .bi_size = bytes },
+       };
 
-fail_stat_request:
-       rbd_obj_request_put(stat_request);
-       return ret;
+       ceph_bvec_iter_advance_step(&it, bytes, ({
+               if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0,
+                              bv.bv_len))
+                       return false;
+       }));
+       return true;
 }
 
-static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
+static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
 {
-       struct rbd_img_request *img_request = obj_request->img_request;
-       struct rbd_device *rbd_dev = img_request->rbd_dev;
-
-       /* Reads */
-       if (!img_request_write_test(img_request) &&
-           !img_request_discard_test(img_request))
-               return true;
+       unsigned int num_osd_ops = obj_req->osd_req->r_num_ops;
 
-       /* Non-layered writes */
-       if (!img_request_layered_test(img_request))
-               return true;
+       dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
+       rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT);
+       rbd_osd_req_destroy(obj_req->osd_req);
 
        /*
-        * Layered writes outside of the parent overlap range don't
-        * share any data with the parent.
+        * Create a copyup request with the same number of OSD ops as
+        * the original request.  The original request was stat + op(s),
+        * the new copyup request will be copyup + the same op(s).
         */
-       if (!obj_request_overlaps_parent(obj_request))
-               return true;
+       obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
+       if (!obj_req->osd_req)
+               return -ENOMEM;
 
        /*
-        * Entire-object layered writes - we will overwrite whatever
-        * parent data there is anyway.
+        * Only send non-zero copyup data to save some I/O and network
+        * bandwidth -- zero copyup data is equivalent to the object not
+        * existing.
         */
-       if (!obj_request->offset &&
-           obj_request->length == rbd_obj_bytes(&rbd_dev->header))
-               return true;
+       if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) {
+               dout("%s obj_req %p detected zeroes\n", __func__, obj_req);
+               bytes = 0;
+       }
 
-       /*
-        * If the object is known to already exist, its parent data has
-        * already been copied.
-        */
-       if (obj_request_known_test(obj_request) &&
-           obj_request_exists_test(obj_request))
-               return true;
+       osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
+                           "copyup");
+       osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0,
+                                         obj_req->copyup_bvecs, bytes);
+
+       switch (obj_req->img_request->op_type) {
+       case OBJ_OP_WRITE:
+               __rbd_obj_setup_write(obj_req, 1);
+               break;
+       case OBJ_OP_DISCARD:
+               rbd_assert(!rbd_obj_is_entire(obj_req));
+               __rbd_obj_setup_discard(obj_req, 1);
+               break;
+       default:
+               rbd_assert(0);
+       }
 
-       return false;
+       rbd_obj_request_submit(obj_req);
+       return 0;
 }
 
-static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
+static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
 {
-       rbd_assert(obj_request_img_data_test(obj_request));
-       rbd_assert(obj_request_type_valid(obj_request->type));
-       rbd_assert(obj_request->img_request);
+       u32 i;
 
-       if (img_obj_request_simple(obj_request)) {
-               rbd_obj_request_submit(obj_request);
-               return 0;
-       }
+       rbd_assert(!obj_req->copyup_bvecs);
+       obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap);
+       obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count,
+                                       sizeof(*obj_req->copyup_bvecs),
+                                       GFP_NOIO);
+       if (!obj_req->copyup_bvecs)
+               return -ENOMEM;
 
-       /*
-        * It's a layered write.  The target object might exist but
-        * we may not know that yet.  If we know it doesn't exist,
-        * start by reading the data for the full target object from
-        * the parent so we can use it for a copyup to the target.
-        */
-       if (obj_request_known_test(obj_request))
-               return rbd_img_obj_parent_read_full(obj_request);
+       for (i = 0; i < obj_req->copyup_bvec_count; i++) {
+               unsigned int len = min(obj_overlap, (u64)PAGE_SIZE);
+
+               obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO);
+               if (!obj_req->copyup_bvecs[i].bv_page)
+                       return -ENOMEM;
 
-       /* We don't know whether the target exists.  Go find out. */
+               obj_req->copyup_bvecs[i].bv_offset = 0;
+               obj_req->copyup_bvecs[i].bv_len = len;
+               obj_overlap -= len;
+       }
 
-       return rbd_img_obj_exists_submit(obj_request);
+       rbd_assert(!obj_overlap);
+       return 0;
 }
 
-static int rbd_img_request_submit(struct rbd_img_request *img_request)
+static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req)
 {
-       struct rbd_obj_request *obj_request;
-       struct rbd_obj_request *next_obj_request;
-       int ret = 0;
-
-       dout("%s: img %p\n", __func__, img_request);
+       struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+       int ret;
 
-       rbd_img_request_get(img_request);
-       for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
-               ret = rbd_img_obj_request_submit(obj_request);
-               if (ret)
-                       goto out_put_ireq;
+       rbd_assert(obj_req->num_img_extents);
+       prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
+                     rbd_dev->parent_overlap);
+       if (!obj_req->num_img_extents) {
+               /*
+                * The overlap has become 0 (most likely because the
+                * image has been flattened).  Use rbd_obj_issue_copyup()
+                * to re-submit the original write request -- the copyup
+                * operation itself will be a no-op, since someone must
+                * have populated the child object while we weren't
+                * looking.  Move to WRITE_FLAT state as we'll be done
+                * with the operation once the null copyup completes.
+                */
+               obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+               return rbd_obj_issue_copyup(obj_req, 0);
        }
 
-out_put_ireq:
-       rbd_img_request_put(img_request);
-       return ret;
+       ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req));
+       if (ret)
+               return ret;
+
+       obj_req->write_state = RBD_OBJ_WRITE_COPYUP;
+       return rbd_obj_read_from_parent(obj_req);
 }
 
-static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
+static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req)
 {
-       struct rbd_obj_request *obj_request;
-       struct rbd_device *rbd_dev;
-       u64 obj_end;
-       u64 img_xferred;
-       int img_result;
+       int ret;
 
-       rbd_assert(img_request_child_test(img_request));
+again:
+       switch (obj_req->write_state) {
+       case RBD_OBJ_WRITE_GUARD:
+               rbd_assert(!obj_req->xferred);
+               if (obj_req->result == -ENOENT) {
+                       /*
+                        * The target object doesn't exist.  Read the data for
+                        * the entire target object up to the overlap point (if
+                        * any) from the parent, so we can use it for a copyup.
+                        */
+                       ret = rbd_obj_handle_write_guard(obj_req);
+                       if (ret) {
+                               obj_req->result = ret;
+                               return true;
+                       }
+                       return false;
+               }
+               /* fall through */
+       case RBD_OBJ_WRITE_FLAT:
+               if (!obj_req->result)
+                       /*
+                        * There is no such thing as a successful short
+                        * write -- indicate the whole request was satisfied.
+                        */
+                       obj_req->xferred = obj_req->ex.oe_len;
+               return true;
+       case RBD_OBJ_WRITE_COPYUP:
+               obj_req->write_state = RBD_OBJ_WRITE_GUARD;
+               if (obj_req->result)
+                       goto again;
 
-       /* First get what we need from the image request and release it */
+               rbd_assert(obj_req->xferred);
+               ret = rbd_obj_issue_copyup(obj_req, obj_req->xferred);
+               if (ret) {
+                       obj_req->result = ret;
+                       return true;
+               }
+               return false;
+       default:
+               rbd_assert(0);
+       }
+}
 
-       obj_request = img_request->obj_request;
-       img_xferred = img_request->xferred;
-       img_result = img_request->result;
-       rbd_img_request_put(img_request);
+/*
+ * Returns true if @obj_req is completed, or false otherwise.
+ */
+static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req)
+{
+       switch (obj_req->img_request->op_type) {
+       case OBJ_OP_READ:
+               return rbd_obj_handle_read(obj_req);
+       case OBJ_OP_WRITE:
+               return rbd_obj_handle_write(obj_req);
+       case OBJ_OP_DISCARD:
+               if (rbd_obj_handle_write(obj_req)) {
+                       /*
+                        * Hide -ENOENT from delete/truncate/zero -- discarding
+                        * a non-existent object is not a problem.
+                        */
+                       if (obj_req->result == -ENOENT) {
+                               obj_req->result = 0;
+                               obj_req->xferred = obj_req->ex.oe_len;
+                       }
+                       return true;
+               }
+               return false;
+       default:
+               rbd_assert(0);
+       }
+}
 
-       /*
-        * If the overlap has become 0 (most likely because the
-        * image has been flattened) we need to re-submit the
-        * original request.
-        */
-       rbd_assert(obj_request);
-       rbd_assert(obj_request->img_request);
-       rbd_dev = obj_request->img_request->rbd_dev;
-       if (!rbd_dev->parent_overlap) {
-               rbd_obj_request_submit(obj_request);
+static void rbd_obj_end_request(struct rbd_obj_request *obj_req)
+{
+       struct rbd_img_request *img_req = obj_req->img_request;
+
+       rbd_assert((!obj_req->result &&
+                   obj_req->xferred == obj_req->ex.oe_len) ||
+                  (obj_req->result < 0 && !obj_req->xferred));
+       if (!obj_req->result) {
+               img_req->xferred += obj_req->xferred;
                return;
        }
 
-       obj_request->result = img_result;
-       if (obj_request->result)
-               goto out;
+       rbd_warn(img_req->rbd_dev,
+                "%s at objno %llu %llu~%llu result %d xferred %llu",
+                obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
+                obj_req->ex.oe_off, obj_req->ex.oe_len, obj_req->result,
+                obj_req->xferred);
+       if (!img_req->result) {
+               img_req->result = obj_req->result;
+               img_req->xferred = 0;
+       }
+}
 
-       /*
-        * We need to zero anything beyond the parent overlap
-        * boundary.  Since rbd_img_obj_request_read_callback()
-        * will zero anything beyond the end of a short read, an
-        * easy way to do this is to pretend the data from the
-        * parent came up short--ending at the overlap boundary.
-        */
-       rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length);
-       obj_end = obj_request->img_offset + obj_request->length;
-       if (obj_end > rbd_dev->parent_overlap) {
-               u64 xferred = 0;
+static void rbd_img_end_child_request(struct rbd_img_request *img_req)
+{
+       struct rbd_obj_request *obj_req = img_req->obj_request;
 
-               if (obj_request->img_offset < rbd_dev->parent_overlap)
-                       xferred = rbd_dev->parent_overlap -
-                                       obj_request->img_offset;
+       rbd_assert(test_bit(IMG_REQ_CHILD, &img_req->flags));
+       rbd_assert((!img_req->result &&
+                   img_req->xferred == rbd_obj_img_extents_bytes(obj_req)) ||
+                  (img_req->result < 0 && !img_req->xferred));
 
-               obj_request->xferred = min(img_xferred, xferred);
-       } else {
-               obj_request->xferred = img_xferred;
-       }
-out:
-       rbd_img_obj_request_read_callback(obj_request);
-       rbd_obj_request_complete(obj_request);
+       obj_req->result = img_req->result;
+       obj_req->xferred = img_req->xferred;
+       rbd_img_request_put(img_req);
 }
 
-static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
+static void rbd_img_end_request(struct rbd_img_request *img_req)
 {
-       struct rbd_img_request *img_request;
-       int result;
+       rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
+       rbd_assert((!img_req->result &&
+                   img_req->xferred == blk_rq_bytes(img_req->rq)) ||
+                  (img_req->result < 0 && !img_req->xferred));
 
-       rbd_assert(obj_request_img_data_test(obj_request));
-       rbd_assert(obj_request->img_request != NULL);
-       rbd_assert(obj_request->result == (s32) -ENOENT);
-       rbd_assert(obj_request_type_valid(obj_request->type));
+       blk_mq_end_request(img_req->rq,
+                          errno_to_blk_status(img_req->result));
+       rbd_img_request_put(img_req);
+}
 
-       /* rbd_read_finish(obj_request, obj_request->length); */
-       img_request = rbd_parent_request_create(obj_request,
-                                               obj_request->img_offset,
-                                               obj_request->length);
-       result = -ENOMEM;
-       if (!img_request)
-               goto out_err;
+static void rbd_obj_handle_request(struct rbd_obj_request *obj_req)
+{
+       struct rbd_img_request *img_req;
 
-       if (obj_request->type == OBJ_REQUEST_BIO)
-               result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
-                                               obj_request->bio_list);
-       else
-               result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
-                                               obj_request->pages);
-       if (result)
-               goto out_err;
+again:
+       if (!__rbd_obj_handle_request(obj_req))
+               return;
 
-       img_request->callback = rbd_img_parent_read_callback;
-       result = rbd_img_request_submit(img_request);
-       if (result)
-               goto out_err;
+       img_req = obj_req->img_request;
+       spin_lock(&img_req->completion_lock);
+       rbd_obj_end_request(obj_req);
+       rbd_assert(img_req->pending_count);
+       if (--img_req->pending_count) {
+               spin_unlock(&img_req->completion_lock);
+               return;
+       }
 
-       return;
-out_err:
-       if (img_request)
-               rbd_img_request_put(img_request);
-       obj_request->result = result;
-       obj_request->xferred = 0;
-       obj_request_done_set(obj_request);
+       spin_unlock(&img_req->completion_lock);
+       if (test_bit(IMG_REQ_CHILD, &img_req->flags)) {
+               obj_req = img_req->obj_request;
+               rbd_img_end_child_request(img_req);
+               goto again;
+       }
+       rbd_img_end_request(img_req);
 }
 
 static const struct rbd_client_id rbd_empty_cid;
@@ -3091,8 +2674,8 @@ static int __rbd_notify_op_lock(struct rbd_device *rbd_dev,
 {
        struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
        struct rbd_client_id cid = rbd_get_cid(rbd_dev);
-       int buf_size = 4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN;
-       char buf[buf_size];
+       char buf[4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN];
+       int buf_size = sizeof(buf);
        void *p = buf;
 
        dout("%s rbd_dev %p notify_op %d\n", __func__, rbd_dev, notify_op);
@@ -3610,8 +3193,8 @@ static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
                                     u64 notify_id, u64 cookie, s32 *result)
 {
        struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
-       int buf_size = 4 + CEPH_ENCODING_START_BLK_LEN;
-       char buf[buf_size];
+       char buf[4 + CEPH_ENCODING_START_BLK_LEN];
+       int buf_size = sizeof(buf);
        int ret;
 
        if (result) {
@@ -3887,7 +3470,7 @@ static void rbd_reregister_watch(struct work_struct *work)
 
        ret = rbd_dev_refresh(rbd_dev);
        if (ret)
-               rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret);
+               rbd_warn(rbd_dev, "reregistration refresh failed: %d", ret);
 }
 
 /*
@@ -4070,8 +3653,7 @@ static void rbd_queue_workfn(struct work_struct *work)
                }
        }
 
-       img_request = rbd_img_request_create(rbd_dev, offset, length, op_type,
-                                            snapc);
+       img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
        if (!img_request) {
                result = -ENOMEM;
                goto err_unlock;
@@ -4080,18 +3662,14 @@ static void rbd_queue_workfn(struct work_struct *work)
        snapc = NULL; /* img_request consumes a ref */
 
        if (op_type == OBJ_OP_DISCARD)
-               result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
-                                             NULL);
+               result = rbd_img_fill_nodata(img_request, offset, length);
        else
-               result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
-                                             rq->bio);
-       if (result)
-               goto err_img_request;
-
-       result = rbd_img_request_submit(img_request);
+               result = rbd_img_fill_from_bio(img_request, offset, length,
+                                              rq->bio);
        if (result)
                goto err_img_request;
 
+       rbd_img_request_submit(img_request);
        if (must_be_locked)
                up_read(&rbd_dev->lock_rwsem);
        return;
@@ -4369,7 +3947,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
        blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
        q->limits.max_sectors = queue_max_hw_sectors(q);
        blk_queue_max_segments(q, USHRT_MAX);
-       blk_queue_max_segment_size(q, segment_size);
+       blk_queue_max_segment_size(q, UINT_MAX);
        blk_queue_io_min(q, segment_size);
        blk_queue_io_opt(q, segment_size);
 
@@ -5057,9 +4635,6 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
        } __attribute__ ((packed)) striping_info_buf = { 0 };
        size_t size = sizeof (striping_info_buf);
        void *p;
-       u64 obj_size;
-       u64 stripe_unit;
-       u64 stripe_count;
        int ret;
 
        ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
@@ -5071,31 +4646,9 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
        if (ret < size)
                return -ERANGE;
 
-       /*
-        * We don't actually support the "fancy striping" feature
-        * (STRIPINGV2) yet, but if the striping sizes are the
-        * defaults the behavior is the same as before.  So find
-        * out, and only fail if the image has non-default values.
-        */
-       ret = -EINVAL;
-       obj_size = rbd_obj_bytes(&rbd_dev->header);
        p = &striping_info_buf;
-       stripe_unit = ceph_decode_64(&p);
-       if (stripe_unit != obj_size) {
-               rbd_warn(rbd_dev, "unsupported stripe unit "
-                               "(got %llu want %llu)",
-                               stripe_unit, obj_size);
-               return -EINVAL;
-       }
-       stripe_count = ceph_decode_64(&p);
-       if (stripe_count != 1) {
-               rbd_warn(rbd_dev, "unsupported stripe count "
-                               "(got %llu want 1)", stripe_count);
-               return -EINVAL;
-       }
-       rbd_dev->header.stripe_unit = stripe_unit;
-       rbd_dev->header.stripe_count = stripe_count;
-
+       rbd_dev->header.stripe_unit = ceph_decode_64(&p);
+       rbd_dev->header.stripe_count = ceph_decode_64(&p);
        return 0;
 }
 
@@ -5653,39 +5206,6 @@ out_err:
        return ret;
 }
 
-/*
- * Return pool id (>= 0) or a negative error code.
- */
-static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
-{
-       struct ceph_options *opts = rbdc->client->options;
-       u64 newest_epoch;
-       int tries = 0;
-       int ret;
-
-again:
-       ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name);
-       if (ret == -ENOENT && tries++ < 1) {
-               ret = ceph_monc_get_version(&rbdc->client->monc, "osdmap",
-                                           &newest_epoch);
-               if (ret < 0)
-                       return ret;
-
-               if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
-                       ceph_osdc_maybe_request_map(&rbdc->client->osdc);
-                       (void) ceph_monc_wait_osdmap(&rbdc->client->monc,
-                                                    newest_epoch,
-                                                    opts->mount_timeout);
-                       goto again;
-               } else {
-                       /* the osdmap we have is new enough */
-                       return -ENOENT;
-               }
-       }
-
-       return ret;
-}
-
 static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
 {
        down_write(&rbd_dev->lock_rwsem);
@@ -6114,7 +5634,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
        }
 
        /* pick the pool */
-       rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
+       rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name);
        if (rc < 0) {
                if (rc == -ENOENT)
                        pr_info("pool %s does not exist\n", spec->pool_name);
@@ -6366,16 +5886,8 @@ static int rbd_slab_init(void)
        if (!rbd_obj_request_cache)
                goto out_err;
 
-       rbd_assert(!rbd_bio_clone);
-       rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
-       if (!rbd_bio_clone)
-               goto out_err_clone;
-
        return 0;
 
-out_err_clone:
-       kmem_cache_destroy(rbd_obj_request_cache);
-       rbd_obj_request_cache = NULL;
 out_err:
        kmem_cache_destroy(rbd_img_request_cache);
        rbd_img_request_cache = NULL;
@@ -6391,10 +5903,6 @@ static void rbd_slab_exit(void)
        rbd_assert(rbd_img_request_cache);
        kmem_cache_destroy(rbd_img_request_cache);
        rbd_img_request_cache = NULL;
-
-       rbd_assert(rbd_bio_clone);
-       bioset_free(rbd_bio_clone);
-       rbd_bio_clone = NULL;
 }
 
 static int __init rbd_init(void)
index 0c858d027bf3d606790900190415c78bf3db07be..57dc546628b54d0659d65f4d6f00bb38a8be0efd 100644 (file)
@@ -809,89 +809,6 @@ static __poll_t rtc_poll(struct file *file, poll_table *wait)
 }
 #endif
 
-int rtc_register(rtc_task_t *task)
-{
-#ifndef RTC_IRQ
-       return -EIO;
-#else
-       if (task == NULL || task->func == NULL)
-               return -EINVAL;
-       spin_lock_irq(&rtc_lock);
-       if (rtc_status & RTC_IS_OPEN) {
-               spin_unlock_irq(&rtc_lock);
-               return -EBUSY;
-       }
-       spin_lock(&rtc_task_lock);
-       if (rtc_callback) {
-               spin_unlock(&rtc_task_lock);
-               spin_unlock_irq(&rtc_lock);
-               return -EBUSY;
-       }
-       rtc_status |= RTC_IS_OPEN;
-       rtc_callback = task;
-       spin_unlock(&rtc_task_lock);
-       spin_unlock_irq(&rtc_lock);
-       return 0;
-#endif
-}
-EXPORT_SYMBOL(rtc_register);
-
-int rtc_unregister(rtc_task_t *task)
-{
-#ifndef RTC_IRQ
-       return -EIO;
-#else
-       unsigned char tmp;
-
-       spin_lock_irq(&rtc_lock);
-       spin_lock(&rtc_task_lock);
-       if (rtc_callback != task) {
-               spin_unlock(&rtc_task_lock);
-               spin_unlock_irq(&rtc_lock);
-               return -ENXIO;
-       }
-       rtc_callback = NULL;
-
-       /* disable controls */
-       if (!hpet_mask_rtc_irq_bit(RTC_PIE | RTC_AIE | RTC_UIE)) {
-               tmp = CMOS_READ(RTC_CONTROL);
-               tmp &= ~RTC_PIE;
-               tmp &= ~RTC_AIE;
-               tmp &= ~RTC_UIE;
-               CMOS_WRITE(tmp, RTC_CONTROL);
-               CMOS_READ(RTC_INTR_FLAGS);
-       }
-       if (rtc_status & RTC_TIMER_ON) {
-               rtc_status &= ~RTC_TIMER_ON;
-               del_timer(&rtc_irq_timer);
-       }
-       rtc_status &= ~RTC_IS_OPEN;
-       spin_unlock(&rtc_task_lock);
-       spin_unlock_irq(&rtc_lock);
-       return 0;
-#endif
-}
-EXPORT_SYMBOL(rtc_unregister);
-
-int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
-{
-#ifndef RTC_IRQ
-       return -EIO;
-#else
-       unsigned long flags;
-       if (cmd != RTC_PIE_ON && cmd != RTC_PIE_OFF && cmd != RTC_IRQP_SET)
-               return -EINVAL;
-       spin_lock_irqsave(&rtc_task_lock, flags);
-       if (rtc_callback != task) {
-               spin_unlock_irqrestore(&rtc_task_lock, flags);
-               return -ENXIO;
-       }
-       spin_unlock_irqrestore(&rtc_task_lock, flags);
-       return rtc_do_ioctl(cmd, arg, 1);
-#endif
-}
-EXPORT_SYMBOL(rtc_control);
-
 /*
  *     The various file operations we support.
  */
index c6ebc88a7d8d93148dc3a94c281a5233a2b3501a..72a2975499dbaa605b84bea98cea24672d2d2c09 100644 (file)
@@ -202,6 +202,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
        cur_frequency = clk_get_rate(clk);
        if (!cur_frequency) {
                dev_err(cpu_dev, "Failed to get clock rate for CPU\n");
+               clk_put(clk);
                return -EINVAL;
        }
 
@@ -210,6 +211,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
                return -EINVAL;
 
        armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider);
+       clk_put(clk);
 
        for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
             load_lvl++) {
index 8300a9fcb80c2ec41e0c79e09430a6d40cdd2a57..bc5fc163087607998bd594db39985dadf7f49a01 100644 (file)
@@ -162,14 +162,23 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
                cpu->perf_caps.highest_perf;
        policy->cpuinfo.max_freq = cppc_dmi_max_khz;
 
-       policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num);
        policy->transition_delay_us = cppc_get_transition_latency(cpu_num) /
                NSEC_PER_USEC;
        policy->shared_type = cpu->shared_type;
 
-       if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+       if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
+               int i;
+
                cpumask_copy(policy->cpus, cpu->shared_cpu_map);
-       else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
+
+               for_each_cpu(i, policy->cpus) {
+                       if (unlikely(i == policy->cpu))
+                               continue;
+
+                       memcpy(&all_cpu_data[i]->perf_caps, &cpu->perf_caps,
+                              sizeof(cpu->perf_caps));
+               }
+       } else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
                /* Support only SW_ANY for now. */
                pr_debug("Unsupported CPU co-ord type\n");
                return -EFAULT;
index 10e119ae66dd25e5cb88331238d8029cc8138f99..3a8cc99e6815fa4c1597e9cd5f2367bcc1d40d6c 100644 (file)
@@ -352,20 +352,6 @@ static int set_freq_table_sorted(struct cpufreq_policy *policy)
        return 0;
 }
 
-int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
-                                     struct cpufreq_frequency_table *table)
-{
-       int ret;
-
-       ret = cpufreq_frequency_table_cpuinfo(policy, table);
-       if (ret)
-               return ret;
-
-       policy->freq_table = table;
-       return 0;
-}
-EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show);
-
 int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy)
 {
        int ret;
index 6d084c61ee2532e44d05df2dcc57bdb32d0d793f..17e566afbb41c3da0a720a56905972dffb229ec6 100644 (file)
@@ -26,7 +26,6 @@
 #include <linux/sysfs.h>
 #include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/debugfs.h>
 #include <linux/acpi.h>
 #include <linux/vmalloc.h>
 #include <trace/events/power.h>
index 959a1dbe3835ded7cde2affea4b651796fb28fa3..b4dbc77459b6b58fba1b197609337eb85b36a31a 100644 (file)
@@ -159,13 +159,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
        priv->domain_id = handle->perf_ops->device_domain_id(cpu_dev);
 
        policy->driver_data = priv;
-
-       ret = cpufreq_table_validate_and_show(policy, freq_table);
-       if (ret) {
-               dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
-                       ret);
-               goto out_free_cpufreq_table;
-       }
+       policy->freq_table = freq_table;
 
        /* SCMI allows DVFS request for any domain from any CPU */
        policy->dvfs_possible_from_any_cpu = true;
@@ -179,8 +173,6 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
        policy->fast_switch_possible = true;
        return 0;
 
-out_free_cpufreq_table:
-       dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 out_free_priv:
        kfree(priv);
 out_free_opp:
index a099b7bf74cd9ff741afc21ff0417958280784c3..6ba709b6f0950ee3665f718448dbf97710eb039d 100644 (file)
@@ -304,7 +304,7 @@ static struct platform_driver ti_cpufreq_driver = {
                .name = "ti-cpufreq",
        },
 };
-module_platform_driver(ti_cpufreq_driver);
+builtin_platform_driver(ti_cpufreq_driver);
 
 MODULE_DESCRIPTION("TI CPUFreq/OPP hw-supported driver");
 MODULE_AUTHOR("Dave Gerlach <d-gerlach@ti.com>");
index 0003e9a02637f1ded004d6465b4eaaeef5817821..6df894d65d9e270efe800ff0c371d8c775183f08 100644 (file)
@@ -272,12 +272,18 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
  *
  * @drv: the cpuidle driver
  * @dev: the cpuidle device
+ * @stop_tick: indication on whether or not to stop the tick
  *
  * Returns the index of the idle state.  The return value must not be negative.
+ *
+ * The memory location pointed to by @stop_tick is expected to be written the
+ * 'false' boolean value if the scheduler tick should not be stopped before
+ * entering the returned state.
  */
-int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+                  bool *stop_tick)
 {
-       return cpuidle_curr_governor->select(drv, dev);
+       return cpuidle_curr_governor->select(drv, dev, stop_tick);
 }
 
 /**
index 1ad8745fd6d648321bcae1323c33f2ae549afcce..b24883f85c99543089d2035bfdf6226f589edb1b 100644 (file)
@@ -63,9 +63,10 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
  * ladder_select_state - selects the next state to enter
  * @drv: cpuidle driver
  * @dev: the CPU
+ * @dummy: not used
  */
 static int ladder_select_state(struct cpuidle_driver *drv,
-                               struct cpuidle_device *dev)
+                              struct cpuidle_device *dev, bool *dummy)
 {
        struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
        struct device *device = get_cpu_device(dev->cpu);
index aa390404e85f132705e4ca80506d15d292469c7e..1bfe03ceb236c3cee8df46fc284f1cc17e30c9ec 100644 (file)
 struct menu_device {
        int             last_state_idx;
        int             needs_update;
+       int             tick_wakeup;
 
        unsigned int    next_timer_us;
        unsigned int    predicted_us;
@@ -279,8 +280,10 @@ again:
  * menu_select - selects the next idle state to enter
  * @drv: cpuidle driver containing state data
  * @dev: the CPU
+ * @stop_tick: indication on whether or not to stop the tick
  */
-static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+                      bool *stop_tick)
 {
        struct menu_device *data = this_cpu_ptr(&menu_devices);
        struct device *device = get_cpu_device(dev->cpu);
@@ -292,6 +295,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
        unsigned int expected_interval;
        unsigned long nr_iowaiters, cpu_load;
        int resume_latency = dev_pm_qos_raw_read_value(device);
+       ktime_t delta_next;
 
        if (data->needs_update) {
                menu_update(drv, dev);
@@ -303,11 +307,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
                latency_req = resume_latency;
 
        /* Special case when user has set very strict latency requirement */
-       if (unlikely(latency_req == 0))
+       if (unlikely(latency_req == 0)) {
+               *stop_tick = false;
                return 0;
+       }
 
        /* determine the expected residency time, round up */
-       data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
+       data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
 
        get_iowait_load(&nr_iowaiters, &cpu_load);
        data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
@@ -346,14 +352,30 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
         */
        data->predicted_us = min(data->predicted_us, expected_interval);
 
-       /*
-        * Use the performance multiplier and the user-configurable
-        * latency_req to determine the maximum exit latency.
-        */
-       interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
-       if (latency_req > interactivity_req)
-               latency_req = interactivity_req;
+       if (tick_nohz_tick_stopped()) {
+               /*
+                * If the tick is already stopped, the cost of possible short
+                * idle duration misprediction is much higher, because the CPU
+                * may be stuck in a shallow idle state for a long time as a
+                * result of it.  In that case say we might mispredict and try
+                * to force the CPU into a state for which we would have stopped
+                * the tick, unless a timer is going to expire really soon
+                * anyway.
+                */
+               if (data->predicted_us < TICK_USEC)
+                       data->predicted_us = min_t(unsigned int, TICK_USEC,
+                                                  ktime_to_us(delta_next));
+       } else {
+               /*
+                * Use the performance multiplier and the user-configurable
+                * latency_req to determine the maximum exit latency.
+                */
+               interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+               if (latency_req > interactivity_req)
+                       latency_req = interactivity_req;
+       }
 
+       expected_interval = data->predicted_us;
        /*
         * Find the idle state with the lowest power while satisfying
         * our constraints.
@@ -369,15 +391,52 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
                        idx = i; /* first enabled state */
                if (s->target_residency > data->predicted_us)
                        break;
-               if (s->exit_latency > latency_req)
+               if (s->exit_latency > latency_req) {
+                       /*
+                        * If we break out of the loop for latency reasons, use
+                        * the target residency of the selected state as the
+                        * expected idle duration so that the tick is retained
+                        * as long as that target residency is low enough.
+                        */
+                       expected_interval = drv->states[idx].target_residency;
                        break;
-
+               }
                idx = i;
        }
 
        if (idx == -1)
                idx = 0; /* No states enabled. Must use 0. */
 
+       /*
+        * Don't stop the tick if the selected state is a polling one or if the
+        * expected idle duration is shorter than the tick period length.
+        */
+       if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
+           expected_interval < TICK_USEC) {
+               unsigned int delta_next_us = ktime_to_us(delta_next);
+
+               *stop_tick = false;
+
+               if (!tick_nohz_tick_stopped() && idx > 0 &&
+                   drv->states[idx].target_residency > delta_next_us) {
+                       /*
+                        * The tick is not going to be stopped and the target
+                        * residency of the state to be returned is not within
+                        * the time until the next timer event including the
+                        * tick, so try to correct that.
+                        */
+                       for (i = idx - 1; i >= 0; i--) {
+                           if (drv->states[i].disabled ||
+                               dev->states_usage[i].disable)
+                                       continue;
+
+                               idx = i;
+                               if (drv->states[i].target_residency <= delta_next_us)
+                                       break;
+                       }
+               }
+       }
+
        data->last_state_idx = idx;
 
        return data->last_state_idx;
@@ -397,6 +456,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
 
        data->last_state_idx = index;
        data->needs_update = 1;
+       data->tick_wakeup = tick_nohz_idle_got_tick();
 }
 
 /**
@@ -427,14 +487,27 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
         * assume the state was never reached and the exit latency is 0.
         */
 
-       /* measured value */
-       measured_us = cpuidle_get_last_residency(dev);
-
-       /* Deduct exit latency */
-       if (measured_us > 2 * target->exit_latency)
-               measured_us -= target->exit_latency;
-       else
-               measured_us /= 2;
+       if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
+               /*
+                * The nohz code said that there wouldn't be any events within
+                * the tick boundary (if the tick was stopped), but the idle
+                * duration predictor had a differing opinion.  Since the CPU
+                * was woken up by a tick (that wasn't stopped after all), the
+                * predictor was not quite right, so assume that the CPU could
+                * have been idle long (but not forever) to help the idle
+                * duration predictor do a better job next time.
+                */
+               measured_us = 9 * MAX_INTERESTING / 10;
+       } else {
+               /* measured value */
+               measured_us = cpuidle_get_last_residency(dev);
+
+               /* Deduct exit latency */
+               if (measured_us > 2 * target->exit_latency)
+                       measured_us -= target->exit_latency;
+               else
+                       measured_us /= 2;
+       }
 
        /* Make sure our coefficients do not exceed unity */
        if (measured_us > data->next_timer_us)
index b79aa8f7a49714441fe5cb204f45a48fe4c6034f..e0700bf4893a31e4e4f148ae26e95b509c8b6cbd 100644 (file)
@@ -1,3 +1,7 @@
+config DAX_DRIVER
+       select DAX
+       bool
+
 menuconfig DAX
        tristate "DAX: direct access to differentiated memory"
        select SRCU
@@ -16,7 +20,6 @@ config DEV_DAX
          baseline memory pool.  Mappings of a /dev/daxX.Y device impose
          restrictions that make the mapping behavior deterministic.
 
-
 config DEV_DAX_PMEM
        tristate "PMEM DAX: direct access to persistent memory"
        depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
index 0b61f48f21a690147d029b775333b7a17f23ed3a..be8606457f27f02dc046ad1e8f02841742f8e4b4 100644 (file)
@@ -257,8 +257,8 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
        dax_region = dev_dax->region;
        if (dax_region->align > PAGE_SIZE) {
-               dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-                       __func__, dax_region->align, fault_size);
+               dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+                       dax_region->align, fault_size);
                return VM_FAULT_SIGBUS;
        }
 
@@ -267,8 +267,7 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
        phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
        if (phys == -1) {
-               dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-                               vmf->pgoff);
+               dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff);
                return VM_FAULT_SIGBUS;
        }
 
@@ -299,14 +298,14 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
        dax_region = dev_dax->region;
        if (dax_region->align > PMD_SIZE) {
-               dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-                       __func__, dax_region->align, fault_size);
+               dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+                       dax_region->align, fault_size);
                return VM_FAULT_SIGBUS;
        }
 
        /* dax pmd mappings require pfn_t_devmap() */
        if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
-               dev_dbg(dev, "%s: region lacks devmap flags\n", __func__);
+               dev_dbg(dev, "region lacks devmap flags\n");
                return VM_FAULT_SIGBUS;
        }
 
@@ -323,8 +322,7 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
        pgoff = linear_page_index(vmf->vma, pmd_addr);
        phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE);
        if (phys == -1) {
-               dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-                               pgoff);
+               dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
                return VM_FAULT_SIGBUS;
        }
 
@@ -351,14 +349,14 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
        dax_region = dev_dax->region;
        if (dax_region->align > PUD_SIZE) {
-               dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-                       __func__, dax_region->align, fault_size);
+               dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+                       dax_region->align, fault_size);
                return VM_FAULT_SIGBUS;
        }
 
        /* dax pud mappings require pfn_t_devmap() */
        if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
-               dev_dbg(dev, "%s: region lacks devmap flags\n", __func__);
+               dev_dbg(dev, "region lacks devmap flags\n");
                return VM_FAULT_SIGBUS;
        }
 
@@ -375,8 +373,7 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
        pgoff = linear_page_index(vmf->vma, pud_addr);
        phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE);
        if (phys == -1) {
-               dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-                               pgoff);
+               dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
                return VM_FAULT_SIGBUS;
        }
 
@@ -399,9 +396,8 @@ static int dev_dax_huge_fault(struct vm_fault *vmf,
        struct file *filp = vmf->vma->vm_file;
        struct dev_dax *dev_dax = filp->private_data;
 
-       dev_dbg(&dev_dax->dev, "%s: %s: %s (%#lx - %#lx) size = %d\n", __func__,
-                       current->comm, (vmf->flags & FAULT_FLAG_WRITE)
-                       ? "write" : "read",
+       dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
+                       (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
                        vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
 
        id = dax_read_lock();
@@ -460,7 +456,7 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
        struct dev_dax *dev_dax = filp->private_data;
        int rc, id;
 
-       dev_dbg(&dev_dax->dev, "%s\n", __func__);
+       dev_dbg(&dev_dax->dev, "trace\n");
 
        /*
         * We lock to check dax_dev liveness and will re-check at
@@ -518,7 +514,7 @@ static int dax_open(struct inode *inode, struct file *filp)
        struct inode *__dax_inode = dax_inode(dax_dev);
        struct dev_dax *dev_dax = dax_get_private(dax_dev);
 
-       dev_dbg(&dev_dax->dev, "%s\n", __func__);
+       dev_dbg(&dev_dax->dev, "trace\n");
        inode->i_mapping = __dax_inode->i_mapping;
        inode->i_mapping->host = __dax_inode;
        filp->f_mapping = inode->i_mapping;
@@ -533,7 +529,7 @@ static int dax_release(struct inode *inode, struct file *filp)
 {
        struct dev_dax *dev_dax = filp->private_data;
 
-       dev_dbg(&dev_dax->dev, "%s\n", __func__);
+       dev_dbg(&dev_dax->dev, "trace\n");
        return 0;
 }
 
@@ -575,7 +571,7 @@ static void unregister_dev_dax(void *dev)
        struct inode *inode = dax_inode(dax_dev);
        struct cdev *cdev = inode->i_cdev;
 
-       dev_dbg(dev, "%s\n", __func__);
+       dev_dbg(dev, "trace\n");
 
        kill_dev_dax(dev_dax);
        cdev_device_del(cdev, dev);
index 31b6ecce4c64205f1109532f222b39b980e75a00..fd49b24fd6afd344335e38438fce9dd981d928c9 100644 (file)
@@ -34,7 +34,7 @@ static void dax_pmem_percpu_release(struct percpu_ref *ref)
 {
        struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-       dev_dbg(dax_pmem->dev, "%s\n", __func__);
+       dev_dbg(dax_pmem->dev, "trace\n");
        complete(&dax_pmem->cmp);
 }
 
@@ -43,7 +43,7 @@ static void dax_pmem_percpu_exit(void *data)
        struct percpu_ref *ref = data;
        struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-       dev_dbg(dax_pmem->dev, "%s\n", __func__);
+       dev_dbg(dax_pmem->dev, "trace\n");
        wait_for_completion(&dax_pmem->cmp);
        percpu_ref_exit(ref);
 }
@@ -53,7 +53,7 @@ static void dax_pmem_percpu_kill(void *data)
        struct percpu_ref *ref = data;
        struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-       dev_dbg(dax_pmem->dev, "%s\n", __func__);
+       dev_dbg(dax_pmem->dev, "trace\n");
        percpu_ref_kill(ref);
 }
 
@@ -150,17 +150,7 @@ static struct nd_device_driver dax_pmem_driver = {
        .type = ND_DRIVER_DAX_PMEM,
 };
 
-static int __init dax_pmem_init(void)
-{
-       return nd_driver_register(&dax_pmem_driver);
-}
-module_init(dax_pmem_init);
-
-static void __exit dax_pmem_exit(void)
-{
-       driver_unregister(&dax_pmem_driver.drv);
-}
-module_exit(dax_pmem_exit);
+module_nd_driver(dax_pmem_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Intel Corporation");
index ecdc292aa4e4d861552e4646cdf7bf6bd5c24ea6..2b2332b605e4f52bc3cdb8519c7af44889fe80be 100644 (file)
@@ -124,10 +124,19 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
                return len < 0 ? len : -EIO;
        }
 
-       if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
-                       || pfn_t_devmap(pfn))
+       if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
+               /*
+                * An arch that has enabled the pmem api should also
+                * have its drivers support pfn_t_devmap()
+                *
+                * This is a developer warning and should not trigger in
+                * production. dax_flush() will crash since it depends
+                * on being able to do (page_address(pfn_to_page())).
+                */
+               WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
+       } else if (pfn_t_devmap(pfn)) {
                /* pass */;
-       else {
+       else {
                pr_debug("VFS (%s): error: dax support not enabled\n",
                                sb->s_id);
                return -EOPNOTSUPP;
index 27df3e2837fdec03212085a5dc433307989baea2..6d61cd0236339172b2b1075134db65ed43a2f47d 100644 (file)
@@ -187,6 +187,16 @@ config DMA_SUN6I
        help
          Support for the DMA engine first found in Allwinner A31 SoCs.
 
+config DW_AXI_DMAC
+       tristate "Synopsys DesignWare AXI DMA support"
+       depends on OF || COMPILE_TEST
+       select DMA_ENGINE
+       select DMA_VIRTUAL_CHANNELS
+       help
+         Enable support for Synopsys DesignWare AXI DMA controller.
+         NOTE: This driver wasn't tested on 64 bit platform because
+         of lack 64 bit platform with Synopsys DW AXI DMAC.
+
 config EP93XX_DMA
        bool "Cirrus Logic EP93xx DMA support"
        depends on ARCH_EP93XX || COMPILE_TEST
@@ -633,6 +643,8 @@ config ZX_DMA
 # driver files
 source "drivers/dma/bestcomm/Kconfig"
 
+source "drivers/dma/mediatek/Kconfig"
+
 source "drivers/dma/qcom/Kconfig"
 
 source "drivers/dma/dw/Kconfig"
index b9dca8a0e142067d01bc302952cc7f962c56ff5a..0f62a4d49aabc91a13e1194df562cfb0d3a88aeb 100644 (file)
@@ -28,6 +28,7 @@ obj-$(CONFIG_DMA_OMAP) += omap-dma.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
 obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o
 obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac/
 obj-$(CONFIG_DW_DMAC_CORE) += dw/
 obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_FSL_DMA) += fsldma.o
@@ -75,5 +76,6 @@ obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
 obj-$(CONFIG_ZX_DMA) += zx_dma.o
 obj-$(CONFIG_ST_FDMA) += st_fdma.o
 
+obj-y += mediatek/
 obj-y += qcom/
 obj-y += xilinx/
index c00e3923d7d81154c7b0157491f270e90dd78fdf..94236ec9d4100fd6f1c0673f6c49e1cfd914d234 100644 (file)
@@ -1471,10 +1471,10 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
        for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
                check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
                rmb();
-               initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
-               rmb();
                cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
                rmb();
+               initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
+               rmb();
                cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
                rmb();
 
index 80cc2be6483cb87074b4a11e205f8b15c7b715e3..b9339524d5bd38859e00e701c062acda5dba69e7 100644 (file)
@@ -74,7 +74,11 @@ MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
 
 static bool noverify;
 module_param(noverify, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(noverify, "Disable random data setup and verification");
+MODULE_PARM_DESC(noverify, "Disable data verification (default: verify)");
+
+static bool norandom;
+module_param(norandom, bool, 0644);
+MODULE_PARM_DESC(norandom, "Disable random offset setup (default: random)");
 
 static bool verbose;
 module_param(verbose, bool, S_IRUGO | S_IWUSR);
@@ -103,6 +107,7 @@ struct dmatest_params {
        unsigned int    pq_sources;
        int             timeout;
        bool            noverify;
+       bool            norandom;
 };
 
 /**
@@ -575,7 +580,7 @@ static int dmatest_func(void *data)
                        break;
                }
 
-               if (params->noverify)
+               if (params->norandom)
                        len = params->buf_size;
                else
                        len = dmatest_random() % params->buf_size + 1;
@@ -586,17 +591,19 @@ static int dmatest_func(void *data)
 
                total_len += len;
 
-               if (params->noverify) {
+               if (params->norandom) {
                        src_off = 0;
                        dst_off = 0;
                } else {
-                       start = ktime_get();
                        src_off = dmatest_random() % (params->buf_size - len + 1);
                        dst_off = dmatest_random() % (params->buf_size - len + 1);
 
                        src_off = (src_off >> align) << align;
                        dst_off = (dst_off >> align) << align;
+               }
 
+               if (!params->noverify) {
+                       start = ktime_get();
                        dmatest_init_srcs(thread->srcs, src_off, len,
                                          params->buf_size, is_memset);
                        dmatest_init_dsts(thread->dsts, dst_off, len,
@@ -975,6 +982,7 @@ static void run_threaded_test(struct dmatest_info *info)
        params->pq_sources = pq_sources;
        params->timeout = timeout;
        params->noverify = noverify;
+       params->norandom = norandom;
 
        request_channels(info, DMA_MEMCPY);
        request_channels(info, DMA_MEMSET);
diff --git a/drivers/dma/dw-axi-dmac/Makefile b/drivers/dma/dw-axi-dmac/Makefile
new file mode 100644 (file)
index 0000000..4bfa462
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac-platform.o
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
new file mode 100644 (file)
index 0000000..c4eb55e
--- /dev/null
@@ -0,0 +1,1008 @@
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/types.h>
+
+#include "dw-axi-dmac.h"
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/*
+ * The set of bus widths supported by the DMA controller. DW AXI DMAC supports
+ * master data bus width up to 512 bits (for both AXI master interfaces), but
+ * it depends on IP block configurarion.
+ */
+#define AXI_DMA_BUSWIDTHS                \
+       (DMA_SLAVE_BUSWIDTH_1_BYTE      | \
+       DMA_SLAVE_BUSWIDTH_2_BYTES      | \
+       DMA_SLAVE_BUSWIDTH_4_BYTES      | \
+       DMA_SLAVE_BUSWIDTH_8_BYTES      | \
+       DMA_SLAVE_BUSWIDTH_16_BYTES     | \
+       DMA_SLAVE_BUSWIDTH_32_BYTES     | \
+       DMA_SLAVE_BUSWIDTH_64_BYTES)
+
+static inline void
+axi_dma_iowrite32(struct axi_dma_chip *chip, u32 reg, u32 val)
+{
+       iowrite32(val, chip->regs + reg);
+}
+
+static inline u32 axi_dma_ioread32(struct axi_dma_chip *chip, u32 reg)
+{
+       return ioread32(chip->regs + reg);
+}
+
+static inline void
+axi_chan_iowrite32(struct axi_dma_chan *chan, u32 reg, u32 val)
+{
+       iowrite32(val, chan->chan_regs + reg);
+}
+
+static inline u32 axi_chan_ioread32(struct axi_dma_chan *chan, u32 reg)
+{
+       return ioread32(chan->chan_regs + reg);
+}
+
+static inline void
+axi_chan_iowrite64(struct axi_dma_chan *chan, u32 reg, u64 val)
+{
+       /*
+        * We split one 64 bit write for two 32 bit write as some HW doesn't
+        * support 64 bit access.
+        */
+       iowrite32(lower_32_bits(val), chan->chan_regs + reg);
+       iowrite32(upper_32_bits(val), chan->chan_regs + reg + 4);
+}
+
+static inline void axi_dma_disable(struct axi_dma_chip *chip)
+{
+       u32 val;
+
+       val = axi_dma_ioread32(chip, DMAC_CFG);
+       val &= ~DMAC_EN_MASK;
+       axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_enable(struct axi_dma_chip *chip)
+{
+       u32 val;
+
+       val = axi_dma_ioread32(chip, DMAC_CFG);
+       val |= DMAC_EN_MASK;
+       axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_disable(struct axi_dma_chip *chip)
+{
+       u32 val;
+
+       val = axi_dma_ioread32(chip, DMAC_CFG);
+       val &= ~INT_EN_MASK;
+       axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_enable(struct axi_dma_chip *chip)
+{
+       u32 val;
+
+       val = axi_dma_ioread32(chip, DMAC_CFG);
+       val |= INT_EN_MASK;
+       axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_chan_irq_disable(struct axi_dma_chan *chan, u32 irq_mask)
+{
+       u32 val;
+
+       if (likely(irq_mask == DWAXIDMAC_IRQ_ALL)) {
+               axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, DWAXIDMAC_IRQ_NONE);
+       } else {
+               val = axi_chan_ioread32(chan, CH_INTSTATUS_ENA);
+               val &= ~irq_mask;
+               axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, val);
+       }
+}
+
+static inline void axi_chan_irq_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+       axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_sig_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+       axi_chan_iowrite32(chan, CH_INTSIGNAL_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_clear(struct axi_dma_chan *chan, u32 irq_mask)
+{
+       axi_chan_iowrite32(chan, CH_INTCLEAR, irq_mask);
+}
+
+static inline u32 axi_chan_irq_read(struct axi_dma_chan *chan)
+{
+       return axi_chan_ioread32(chan, CH_INTSTATUS);
+}
+
+static inline void axi_chan_disable(struct axi_dma_chan *chan)
+{
+       u32 val;
+
+       val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+       val &= ~(BIT(chan->id) << DMAC_CHAN_EN_SHIFT);
+       val |=   BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+       axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline void axi_chan_enable(struct axi_dma_chan *chan)
+{
+       u32 val;
+
+       val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+       val |= BIT(chan->id) << DMAC_CHAN_EN_SHIFT |
+              BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+       axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline bool axi_chan_is_hw_enable(struct axi_dma_chan *chan)
+{
+       u32 val;
+
+       val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+
+       return !!(val & (BIT(chan->id) << DMAC_CHAN_EN_SHIFT));
+}
+
+static void axi_dma_hw_init(struct axi_dma_chip *chip)
+{
+       u32 i;
+
+       for (i = 0; i < chip->dw->hdata->nr_channels; i++) {
+               axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+               axi_chan_disable(&chip->dw->chan[i]);
+       }
+}
+
+static u32 axi_chan_get_xfer_width(struct axi_dma_chan *chan, dma_addr_t src,
+                                  dma_addr_t dst, size_t len)
+{
+       u32 max_width = chan->chip->dw->hdata->m_data_width;
+
+       return __ffs(src | dst | len | BIT(max_width));
+}
+
+static inline const char *axi_chan_name(struct axi_dma_chan *chan)
+{
+       return dma_chan_name(&chan->vc.chan);
+}
+
+static struct axi_dma_desc *axi_desc_get(struct axi_dma_chan *chan)
+{
+       struct dw_axi_dma *dw = chan->chip->dw;
+       struct axi_dma_desc *desc;
+       dma_addr_t phys;
+
+       desc = dma_pool_zalloc(dw->desc_pool, GFP_NOWAIT, &phys);
+       if (unlikely(!desc)) {
+               dev_err(chan2dev(chan), "%s: not enough descriptors available\n",
+                       axi_chan_name(chan));
+               return NULL;
+       }
+
+       atomic_inc(&chan->descs_allocated);
+       INIT_LIST_HEAD(&desc->xfer_list);
+       desc->vd.tx.phys = phys;
+       desc->chan = chan;
+
+       return desc;
+}
+
+static void axi_desc_put(struct axi_dma_desc *desc)
+{
+       struct axi_dma_chan *chan = desc->chan;
+       struct dw_axi_dma *dw = chan->chip->dw;
+       struct axi_dma_desc *child, *_next;
+       unsigned int descs_put = 0;
+
+       list_for_each_entry_safe(child, _next, &desc->xfer_list, xfer_list) {
+               list_del(&child->xfer_list);
+               dma_pool_free(dw->desc_pool, child, child->vd.tx.phys);
+               descs_put++;
+       }
+
+       dma_pool_free(dw->desc_pool, desc, desc->vd.tx.phys);
+       descs_put++;
+
+       atomic_sub(descs_put, &chan->descs_allocated);
+       dev_vdbg(chan2dev(chan), "%s: %d descs put, %d still allocated\n",
+               axi_chan_name(chan), descs_put,
+               atomic_read(&chan->descs_allocated));
+}
+
+static void vchan_desc_put(struct virt_dma_desc *vdesc)
+{
+       axi_desc_put(vd_to_axi_desc(vdesc));
+}
+
+static enum dma_status
+dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
+                 struct dma_tx_state *txstate)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+       enum dma_status ret;
+
+       ret = dma_cookie_status(dchan, cookie, txstate);
+
+       if (chan->is_paused && ret == DMA_IN_PROGRESS)
+               ret = DMA_PAUSED;
+
+       return ret;
+}
+
+static void write_desc_llp(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+       desc->lli.llp = cpu_to_le64(adr);
+}
+
+static void write_chan_llp(struct axi_dma_chan *chan, dma_addr_t adr)
+{
+       axi_chan_iowrite64(chan, CH_LLP, adr);
+}
+
+/* Called in chan locked context */
+static void axi_chan_block_xfer_start(struct axi_dma_chan *chan,
+                                     struct axi_dma_desc *first)
+{
+       u32 priority = chan->chip->dw->hdata->priority[chan->id];
+       u32 reg, irq_mask;
+       u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+       if (unlikely(axi_chan_is_hw_enable(chan))) {
+               dev_err(chan2dev(chan), "%s is non-idle!\n",
+                       axi_chan_name(chan));
+
+               return;
+       }
+
+       axi_dma_enable(chan->chip);
+
+       reg = (DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_DST_MULTBLK_TYPE_POS |
+              DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_SRC_MULTBLK_TYPE_POS);
+       axi_chan_iowrite32(chan, CH_CFG_L, reg);
+
+       reg = (DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC << CH_CFG_H_TT_FC_POS |
+              priority << CH_CFG_H_PRIORITY_POS |
+              DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_DST_POS |
+              DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_SRC_POS);
+       axi_chan_iowrite32(chan, CH_CFG_H, reg);
+
+       write_chan_llp(chan, first->vd.tx.phys | lms);
+
+       irq_mask = DWAXIDMAC_IRQ_DMA_TRF | DWAXIDMAC_IRQ_ALL_ERR;
+       axi_chan_irq_sig_set(chan, irq_mask);
+
+       /* Generate 'suspend' status but don't generate interrupt */
+       irq_mask |= DWAXIDMAC_IRQ_SUSPENDED;
+       axi_chan_irq_set(chan, irq_mask);
+
+       axi_chan_enable(chan);
+}
+
+static void axi_chan_start_first_queued(struct axi_dma_chan *chan)
+{
+       struct axi_dma_desc *desc;
+       struct virt_dma_desc *vd;
+
+       vd = vchan_next_desc(&chan->vc);
+       if (!vd)
+               return;
+
+       desc = vd_to_axi_desc(vd);
+       dev_vdbg(chan2dev(chan), "%s: started %u\n", axi_chan_name(chan),
+               vd->tx.cookie);
+       axi_chan_block_xfer_start(chan, desc);
+}
+
+static void dma_chan_issue_pending(struct dma_chan *dchan)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       if (vchan_issue_pending(&chan->vc))
+               axi_chan_start_first_queued(chan);
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static int dma_chan_alloc_chan_resources(struct dma_chan *dchan)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+       /* ASSERT: channel is idle */
+       if (axi_chan_is_hw_enable(chan)) {
+               dev_err(chan2dev(chan), "%s is non-idle!\n",
+                       axi_chan_name(chan));
+               return -EBUSY;
+       }
+
+       dev_vdbg(dchan2dev(dchan), "%s: allocating\n", axi_chan_name(chan));
+
+       pm_runtime_get(chan->chip->dev);
+
+       return 0;
+}
+
+static void dma_chan_free_chan_resources(struct dma_chan *dchan)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+       /* ASSERT: channel is idle */
+       if (axi_chan_is_hw_enable(chan))
+               dev_err(dchan2dev(dchan), "%s is non-idle!\n",
+                       axi_chan_name(chan));
+
+       axi_chan_disable(chan);
+       axi_chan_irq_disable(chan, DWAXIDMAC_IRQ_ALL);
+
+       vchan_free_chan_resources(&chan->vc);
+
+       dev_vdbg(dchan2dev(dchan),
+                "%s: free resources, descriptor still allocated: %u\n",
+                axi_chan_name(chan), atomic_read(&chan->descs_allocated));
+
+       pm_runtime_put(chan->chip->dev);
+}
+
+/*
+ * If DW_axi_dmac sees CHx_CTL.ShadowReg_Or_LLI_Last bit of the fetched LLI
+ * as 1, it understands that the current block is the final block in the
+ * transfer and completes the DMA transfer operation at the end of current
+ * block transfer.
+ */
+static void set_desc_last(struct axi_dma_desc *desc)
+{
+       u32 val;
+
+       val = le32_to_cpu(desc->lli.ctl_hi);
+       val |= CH_CTL_H_LLI_LAST;
+       desc->lli.ctl_hi = cpu_to_le32(val);
+}
+
+static void write_desc_sar(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+       desc->lli.sar = cpu_to_le64(adr);
+}
+
+static void write_desc_dar(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+       desc->lli.dar = cpu_to_le64(adr);
+}
+
+static void set_desc_src_master(struct axi_dma_desc *desc)
+{
+       u32 val;
+
+       /* Select AXI0 for source master */
+       val = le32_to_cpu(desc->lli.ctl_lo);
+       val &= ~CH_CTL_L_SRC_MAST;
+       desc->lli.ctl_lo = cpu_to_le32(val);
+}
+
+static void set_desc_dest_master(struct axi_dma_desc *desc)
+{
+       u32 val;
+
+       /* Select AXI1 for source master if available */
+       val = le32_to_cpu(desc->lli.ctl_lo);
+       if (desc->chan->chip->dw->hdata->nr_masters > 1)
+               val |= CH_CTL_L_DST_MAST;
+       else
+               val &= ~CH_CTL_L_DST_MAST;
+
+       desc->lli.ctl_lo = cpu_to_le32(val);
+}
+
+static struct dma_async_tx_descriptor *
+dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
+                        dma_addr_t src_adr, size_t len, unsigned long flags)
+{
+       struct axi_dma_desc *first = NULL, *desc = NULL, *prev = NULL;
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+       size_t block_ts, max_block_ts, xfer_len;
+       u32 xfer_width, reg;
+       u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+       dev_dbg(chan2dev(chan), "%s: memcpy: src: %pad dst: %pad length: %zd flags: %#lx",
+               axi_chan_name(chan), &src_adr, &dst_adr, len, flags);
+
+       max_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+
+       while (len) {
+               xfer_len = len;
+
+               /*
+                * Take care for the alignment.
+                * Actually source and destination widths can be different, but
+                * make them same to be simpler.
+                */
+               xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, xfer_len);
+
+               /*
+                * block_ts indicates the total number of data of width
+                * to be transferred in a DMA block transfer.
+                * BLOCK_TS register should be set to block_ts - 1
+                */
+               block_ts = xfer_len >> xfer_width;
+               if (block_ts > max_block_ts) {
+                       block_ts = max_block_ts;
+                       xfer_len = max_block_ts << xfer_width;
+               }
+
+               desc = axi_desc_get(chan);
+               if (unlikely(!desc))
+                       goto err_desc_get;
+
+               write_desc_sar(desc, src_adr);
+               write_desc_dar(desc, dst_adr);
+               desc->lli.block_ts_lo = cpu_to_le32(block_ts - 1);
+
+               reg = CH_CTL_H_LLI_VALID;
+               if (chan->chip->dw->hdata->restrict_axi_burst_len) {
+                       u32 burst_len = chan->chip->dw->hdata->axi_rw_burst_len;
+
+                       reg |= (CH_CTL_H_ARLEN_EN |
+                               burst_len << CH_CTL_H_ARLEN_POS |
+                               CH_CTL_H_AWLEN_EN |
+                               burst_len << CH_CTL_H_AWLEN_POS);
+               }
+               desc->lli.ctl_hi = cpu_to_le32(reg);
+
+               reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
+                      DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
+                      xfer_width << CH_CTL_L_DST_WIDTH_POS |
+                      xfer_width << CH_CTL_L_SRC_WIDTH_POS |
+                      DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
+                      DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS);
+               desc->lli.ctl_lo = cpu_to_le32(reg);
+
+               set_desc_src_master(desc);
+               set_desc_dest_master(desc);
+
+               /* Manage transfer list (xfer_list) */
+               if (!first) {
+                       first = desc;
+               } else {
+                       list_add_tail(&desc->xfer_list, &first->xfer_list);
+                       write_desc_llp(prev, desc->vd.tx.phys | lms);
+               }
+               prev = desc;
+
+               /* update the length and addresses for the next loop cycle */
+               len -= xfer_len;
+               dst_adr += xfer_len;
+               src_adr += xfer_len;
+       }
+
+       /* Total len of src/dest sg == 0, so no descriptor were allocated */
+       if (unlikely(!first))
+               return NULL;
+
+       /* Set end-of-link to the last link descriptor of list */
+       set_desc_last(desc);
+
+       return vchan_tx_prep(&chan->vc, &first->vd, flags);
+
+err_desc_get:
+       axi_desc_put(first);
+       return NULL;
+}
+
+static void axi_chan_dump_lli(struct axi_dma_chan *chan,
+                             struct axi_dma_desc *desc)
+{
+       dev_err(dchan2dev(&chan->vc.chan),
+               "SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x",
+               le64_to_cpu(desc->lli.sar),
+               le64_to_cpu(desc->lli.dar),
+               le64_to_cpu(desc->lli.llp),
+               le32_to_cpu(desc->lli.block_ts_lo),
+               le32_to_cpu(desc->lli.ctl_hi),
+               le32_to_cpu(desc->lli.ctl_lo));
+}
+
+static void axi_chan_list_dump_lli(struct axi_dma_chan *chan,
+                                  struct axi_dma_desc *desc_head)
+{
+       struct axi_dma_desc *desc;
+
+       axi_chan_dump_lli(chan, desc_head);
+       list_for_each_entry(desc, &desc_head->xfer_list, xfer_list)
+               axi_chan_dump_lli(chan, desc);
+}
+
+static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
+{
+       struct virt_dma_desc *vd;
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+
+       axi_chan_disable(chan);
+
+       /* The bad descriptor currently is in the head of vc list */
+       vd = vchan_next_desc(&chan->vc);
+       /* Remove the completed descriptor from issued list */
+       list_del(&vd->node);
+
+       /* WARN about bad descriptor */
+       dev_err(chan2dev(chan),
+               "Bad descriptor submitted for %s, cookie: %d, irq: 0x%08x\n",
+               axi_chan_name(chan), vd->tx.cookie, status);
+       axi_chan_list_dump_lli(chan, vd_to_axi_desc(vd));
+
+       vchan_cookie_complete(vd);
+
+       /* Try to restart the controller */
+       axi_chan_start_first_queued(chan);
+
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
+{
+       struct virt_dma_desc *vd;
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       if (unlikely(axi_chan_is_hw_enable(chan))) {
+               dev_err(chan2dev(chan), "BUG: %s caught DWAXIDMAC_IRQ_DMA_TRF, but channel not idle!\n",
+                       axi_chan_name(chan));
+               axi_chan_disable(chan);
+       }
+
+       /* The completed descriptor currently is in the head of vc list */
+       vd = vchan_next_desc(&chan->vc);
+       /* Remove the completed descriptor from issued list before completing */
+       list_del(&vd->node);
+       vchan_cookie_complete(vd);
+
+       /* Submit queued descriptors after processing the completed ones */
+       axi_chan_start_first_queued(chan);
+
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static irqreturn_t dw_axi_dma_interrupt(int irq, void *dev_id)
+{
+       struct axi_dma_chip *chip = dev_id;
+       struct dw_axi_dma *dw = chip->dw;
+       struct axi_dma_chan *chan;
+
+       u32 status, i;
+
+       /* Disable DMAC inerrupts. We'll enable them after processing chanels */
+       axi_dma_irq_disable(chip);
+
+       /* Poll, clear and process every chanel interrupt status */
+       for (i = 0; i < dw->hdata->nr_channels; i++) {
+               chan = &dw->chan[i];
+               status = axi_chan_irq_read(chan);
+               axi_chan_irq_clear(chan, status);
+
+               dev_vdbg(chip->dev, "%s %u IRQ status: 0x%08x\n",
+                       axi_chan_name(chan), i, status);
+
+               if (status & DWAXIDMAC_IRQ_ALL_ERR)
+                       axi_chan_handle_err(chan, status);
+               else if (status & DWAXIDMAC_IRQ_DMA_TRF)
+                       axi_chan_block_xfer_complete(chan);
+       }
+
+       /* Re-enable interrupts */
+       axi_dma_irq_enable(chip);
+
+       return IRQ_HANDLED;
+}
+
+static int dma_chan_terminate_all(struct dma_chan *dchan)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+       unsigned long flags;
+       LIST_HEAD(head);
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+
+       axi_chan_disable(chan);
+
+       vchan_get_all_descriptors(&chan->vc, &head);
+
+       /*
+        * As vchan_dma_desc_free_list can access to desc_allocated list
+        * we need to call it in vc.lock context.
+        */
+       vchan_dma_desc_free_list(&chan->vc, &head);
+
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       dev_vdbg(dchan2dev(dchan), "terminated: %s\n", axi_chan_name(chan));
+
+       return 0;
+}
+
+static int dma_chan_pause(struct dma_chan *dchan)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+       unsigned long flags;
+       unsigned int timeout = 20; /* timeout iterations */
+       u32 val;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+
+       val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+       val |= BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT |
+              BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT;
+       axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+
+       do  {
+               if (axi_chan_irq_read(chan) & DWAXIDMAC_IRQ_SUSPENDED)
+                       break;
+
+               udelay(2);
+       } while (--timeout);
+
+       axi_chan_irq_clear(chan, DWAXIDMAC_IRQ_SUSPENDED);
+
+       chan->is_paused = true;
+
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       return timeout ? 0 : -EAGAIN;
+}
+
+/* Called in chan locked context */
+static inline void axi_chan_resume(struct axi_dma_chan *chan)
+{
+       u32 val;
+
+       val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+       val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT);
+       val |=  (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT);
+       axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+
+       chan->is_paused = false;
+}
+
+static int dma_chan_resume(struct dma_chan *dchan)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+
+       if (chan->is_paused)
+               axi_chan_resume(chan);
+
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       return 0;
+}
+
+static int axi_dma_suspend(struct axi_dma_chip *chip)
+{
+       axi_dma_irq_disable(chip);
+       axi_dma_disable(chip);
+
+       clk_disable_unprepare(chip->core_clk);
+       clk_disable_unprepare(chip->cfgr_clk);
+
+       return 0;
+}
+
+static int axi_dma_resume(struct axi_dma_chip *chip)
+{
+       int ret;
+
+       ret = clk_prepare_enable(chip->cfgr_clk);
+       if (ret < 0)
+               return ret;
+
+       ret = clk_prepare_enable(chip->core_clk);
+       if (ret < 0)
+               return ret;
+
+       axi_dma_enable(chip);
+       axi_dma_irq_enable(chip);
+
+       return 0;
+}
+
+static int __maybe_unused axi_dma_runtime_suspend(struct device *dev)
+{
+       struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+       return axi_dma_suspend(chip);
+}
+
+static int __maybe_unused axi_dma_runtime_resume(struct device *dev)
+{
+       struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+       return axi_dma_resume(chip);
+}
+
+static int parse_device_properties(struct axi_dma_chip *chip)
+{
+       struct device *dev = chip->dev;
+       u32 tmp, carr[DMAC_MAX_CHANNELS];
+       int ret;
+
+       ret = device_property_read_u32(dev, "dma-channels", &tmp);
+       if (ret)
+               return ret;
+       if (tmp == 0 || tmp > DMAC_MAX_CHANNELS)
+               return -EINVAL;
+
+       chip->dw->hdata->nr_channels = tmp;
+
+       ret = device_property_read_u32(dev, "snps,dma-masters", &tmp);
+       if (ret)
+               return ret;
+       if (tmp == 0 || tmp > DMAC_MAX_MASTERS)
+               return -EINVAL;
+
+       chip->dw->hdata->nr_masters = tmp;
+
+       ret = device_property_read_u32(dev, "snps,data-width", &tmp);
+       if (ret)
+               return ret;
+       if (tmp > DWAXIDMAC_TRANS_WIDTH_MAX)
+               return -EINVAL;
+
+       chip->dw->hdata->m_data_width = tmp;
+
+       ret = device_property_read_u32_array(dev, "snps,block-size", carr,
+                                            chip->dw->hdata->nr_channels);
+       if (ret)
+               return ret;
+       for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+               if (carr[tmp] == 0 || carr[tmp] > DMAC_MAX_BLK_SIZE)
+                       return -EINVAL;
+
+               chip->dw->hdata->block_size[tmp] = carr[tmp];
+       }
+
+       ret = device_property_read_u32_array(dev, "snps,priority", carr,
+                                            chip->dw->hdata->nr_channels);
+       if (ret)
+               return ret;
+       /* Priority value must be programmed within [0:nr_channels-1] range */
+       for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+               if (carr[tmp] >= chip->dw->hdata->nr_channels)
+                       return -EINVAL;
+
+               chip->dw->hdata->priority[tmp] = carr[tmp];
+       }
+
+       /* axi-max-burst-len is optional property */
+       ret = device_property_read_u32(dev, "snps,axi-max-burst-len", &tmp);
+       if (!ret) {
+               if (tmp > DWAXIDMAC_ARWLEN_MAX + 1)
+                       return -EINVAL;
+               if (tmp < DWAXIDMAC_ARWLEN_MIN + 1)
+                       return -EINVAL;
+
+               chip->dw->hdata->restrict_axi_burst_len = true;
+               chip->dw->hdata->axi_rw_burst_len = tmp - 1;
+       }
+
+       return 0;
+}
+
+static int dw_probe(struct platform_device *pdev)
+{
+       struct axi_dma_chip *chip;
+       struct resource *mem;
+       struct dw_axi_dma *dw;
+       struct dw_axi_dma_hcfg *hdata;
+       u32 i;
+       int ret;
+
+       chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+       if (!chip)
+               return -ENOMEM;
+
+       dw = devm_kzalloc(&pdev->dev, sizeof(*dw), GFP_KERNEL);
+       if (!dw)
+               return -ENOMEM;
+
+       hdata = devm_kzalloc(&pdev->dev, sizeof(*hdata), GFP_KERNEL);
+       if (!hdata)
+               return -ENOMEM;
+
+       chip->dw = dw;
+       chip->dev = &pdev->dev;
+       chip->dw->hdata = hdata;
+
+       chip->irq = platform_get_irq(pdev, 0);
+       if (chip->irq < 0)
+               return chip->irq;
+
+       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       chip->regs = devm_ioremap_resource(chip->dev, mem);
+       if (IS_ERR(chip->regs))
+               return PTR_ERR(chip->regs);
+
+       chip->core_clk = devm_clk_get(chip->dev, "core-clk");
+       if (IS_ERR(chip->core_clk))
+               return PTR_ERR(chip->core_clk);
+
+       chip->cfgr_clk = devm_clk_get(chip->dev, "cfgr-clk");
+       if (IS_ERR(chip->cfgr_clk))
+               return PTR_ERR(chip->cfgr_clk);
+
+       ret = parse_device_properties(chip);
+       if (ret)
+               return ret;
+
+       dw->chan = devm_kcalloc(chip->dev, hdata->nr_channels,
+                               sizeof(*dw->chan), GFP_KERNEL);
+       if (!dw->chan)
+               return -ENOMEM;
+
+       ret = devm_request_irq(chip->dev, chip->irq, dw_axi_dma_interrupt,
+                              IRQF_SHARED, KBUILD_MODNAME, chip);
+       if (ret)
+               return ret;
+
+       /* Lli address must be aligned to a 64-byte boundary */
+       dw->desc_pool = dmam_pool_create(KBUILD_MODNAME, chip->dev,
+                                        sizeof(struct axi_dma_desc), 64, 0);
+       if (!dw->desc_pool) {
+               dev_err(chip->dev, "No memory for descriptors dma pool\n");
+               return -ENOMEM;
+       }
+
+       INIT_LIST_HEAD(&dw->dma.channels);
+       for (i = 0; i < hdata->nr_channels; i++) {
+               struct axi_dma_chan *chan = &dw->chan[i];
+
+               chan->chip = chip;
+               chan->id = i;
+               chan->chan_regs = chip->regs + COMMON_REG_LEN + i * CHAN_REG_LEN;
+               atomic_set(&chan->descs_allocated, 0);
+
+               chan->vc.desc_free = vchan_desc_put;
+               vchan_init(&chan->vc, &dw->dma);
+       }
+
+       /* Set capabilities */
+       dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+
+       /* DMA capabilities */
+       dw->dma.chancnt = hdata->nr_channels;
+       dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS;
+       dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS;
+       dw->dma.directions = BIT(DMA_MEM_TO_MEM);
+       dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+       dw->dma.dev = chip->dev;
+       dw->dma.device_tx_status = dma_chan_tx_status;
+       dw->dma.device_issue_pending = dma_chan_issue_pending;
+       dw->dma.device_terminate_all = dma_chan_terminate_all;
+       dw->dma.device_pause = dma_chan_pause;
+       dw->dma.device_resume = dma_chan_resume;
+
+       dw->dma.device_alloc_chan_resources = dma_chan_alloc_chan_resources;
+       dw->dma.device_free_chan_resources = dma_chan_free_chan_resources;
+
+       dw->dma.device_prep_dma_memcpy = dma_chan_prep_dma_memcpy;
+
+       platform_set_drvdata(pdev, chip);
+
+       pm_runtime_enable(chip->dev);
+
+       /*
+        * We can't just call pm_runtime_get here instead of
+        * pm_runtime_get_noresume + axi_dma_resume because we need
+        * driver to work also without Runtime PM.
+        */
+       pm_runtime_get_noresume(chip->dev);
+       ret = axi_dma_resume(chip);
+       if (ret < 0)
+               goto err_pm_disable;
+
+       axi_dma_hw_init(chip);
+
+       pm_runtime_put(chip->dev);
+
+       ret = dma_async_device_register(&dw->dma);
+       if (ret)
+               goto err_pm_disable;
+
+       dev_info(chip->dev, "DesignWare AXI DMA Controller, %d channels\n",
+                dw->hdata->nr_channels);
+
+       return 0;
+
+err_pm_disable:
+       pm_runtime_disable(chip->dev);
+
+       return ret;
+}
+
+static int dw_remove(struct platform_device *pdev)
+{
+       struct axi_dma_chip *chip = platform_get_drvdata(pdev);
+       struct dw_axi_dma *dw = chip->dw;
+       struct axi_dma_chan *chan, *_chan;
+       u32 i;
+
+       /* Enable clk before accessing to registers */
+       clk_prepare_enable(chip->cfgr_clk);
+       clk_prepare_enable(chip->core_clk);
+       axi_dma_irq_disable(chip);
+       for (i = 0; i < dw->hdata->nr_channels; i++) {
+               axi_chan_disable(&chip->dw->chan[i]);
+               axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+       }
+       axi_dma_disable(chip);
+
+       pm_runtime_disable(chip->dev);
+       axi_dma_suspend(chip);
+
+       devm_free_irq(chip->dev, chip->irq, chip);
+
+       list_for_each_entry_safe(chan, _chan, &dw->dma.channels,
+                       vc.chan.device_node) {
+               list_del(&chan->vc.chan.device_node);
+               tasklet_kill(&chan->vc.task);
+       }
+
+       dma_async_device_unregister(&dw->dma);
+
+       return 0;
+}
+
+static const struct dev_pm_ops dw_axi_dma_pm_ops = {
+       SET_RUNTIME_PM_OPS(axi_dma_runtime_suspend, axi_dma_runtime_resume, NULL)
+};
+
+static const struct of_device_id dw_dma_of_id_table[] = {
+       { .compatible = "snps,axi-dma-1.01a" },
+       {}
+};
+MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
+
+static struct platform_driver dw_driver = {
+       .probe          = dw_probe,
+       .remove         = dw_remove,
+       .driver = {
+               .name   = KBUILD_MODNAME,
+               .of_match_table = of_match_ptr(dw_dma_of_id_table),
+               .pm = &dw_axi_dma_pm_ops,
+       },
+};
+module_platform_driver(dw_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare AXI DMA Controller platform driver");
+MODULE_AUTHOR("Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>");
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
new file mode 100644 (file)
index 0000000..f8888dc
--- /dev/null
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#ifndef _AXI_DMA_PLATFORM_H
+#define _AXI_DMA_PLATFORM_H
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/types.h>
+
+#include "../virt-dma.h"
+
+#define DMAC_MAX_CHANNELS      8
+#define DMAC_MAX_MASTERS       2
+#define DMAC_MAX_BLK_SIZE      0x200000
+
+struct dw_axi_dma_hcfg {
+       u32     nr_channels;
+       u32     nr_masters;
+       u32     m_data_width;
+       u32     block_size[DMAC_MAX_CHANNELS];
+       u32     priority[DMAC_MAX_CHANNELS];
+       /* maximum supported axi burst length */
+       u32     axi_rw_burst_len;
+       bool    restrict_axi_burst_len;
+};
+
+struct axi_dma_chan {
+       struct axi_dma_chip             *chip;
+       void __iomem                    *chan_regs;
+       u8                              id;
+       atomic_t                        descs_allocated;
+
+       struct virt_dma_chan            vc;
+
+       /* these other elements are all protected by vc.lock */
+       bool                            is_paused;
+};
+
+struct dw_axi_dma {
+       struct dma_device       dma;
+       struct dw_axi_dma_hcfg  *hdata;
+       struct dma_pool         *desc_pool;
+
+       /* channels */
+       struct axi_dma_chan     *chan;
+};
+
+struct axi_dma_chip {
+       struct device           *dev;
+       int                     irq;
+       void __iomem            *regs;
+       struct clk              *core_clk;
+       struct clk              *cfgr_clk;
+       struct dw_axi_dma       *dw;
+};
+
+/* LLI == Linked List Item */
+struct __packed axi_dma_lli {
+       __le64          sar;
+       __le64          dar;
+       __le32          block_ts_lo;
+       __le32          block_ts_hi;
+       __le64          llp;
+       __le32          ctl_lo;
+       __le32          ctl_hi;
+       __le32          sstat;
+       __le32          dstat;
+       __le32          status_lo;
+       __le32          ststus_hi;
+       __le32          reserved_lo;
+       __le32          reserved_hi;
+};
+
+struct axi_dma_desc {
+       struct axi_dma_lli              lli;
+
+       struct virt_dma_desc            vd;
+       struct axi_dma_chan             *chan;
+       struct list_head                xfer_list;
+};
+
+static inline struct device *dchan2dev(struct dma_chan *dchan)
+{
+       return &dchan->dev->device;
+}
+
+static inline struct device *chan2dev(struct axi_dma_chan *chan)
+{
+       return &chan->vc.chan.dev->device;
+}
+
+static inline struct axi_dma_desc *vd_to_axi_desc(struct virt_dma_desc *vd)
+{
+       return container_of(vd, struct axi_dma_desc, vd);
+}
+
+static inline struct axi_dma_chan *vc_to_axi_dma_chan(struct virt_dma_chan *vc)
+{
+       return container_of(vc, struct axi_dma_chan, vc);
+}
+
+static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan)
+{
+       return vc_to_axi_dma_chan(to_virt_chan(dchan));
+}
+
+
+#define COMMON_REG_LEN         0x100
+#define CHAN_REG_LEN           0x100
+
+/* Common registers offset */
+#define DMAC_ID                        0x000 /* R DMAC ID */
+#define DMAC_COMPVER           0x008 /* R DMAC Component Version */
+#define DMAC_CFG               0x010 /* R/W DMAC Configuration */
+#define DMAC_CHEN              0x018 /* R/W DMAC Channel Enable */
+#define DMAC_CHEN_L            0x018 /* R/W DMAC Channel Enable 00-31 */
+#define DMAC_CHEN_H            0x01C /* R/W DMAC Channel Enable 32-63 */
+#define DMAC_INTSTATUS         0x030 /* R DMAC Interrupt Status */
+#define DMAC_COMMON_INTCLEAR   0x038 /* W DMAC Interrupt Clear */
+#define DMAC_COMMON_INTSTATUS_ENA 0x040 /* R DMAC Interrupt Status Enable */
+#define DMAC_COMMON_INTSIGNAL_ENA 0x048 /* R/W DMAC Interrupt Signal Enable */
+#define DMAC_COMMON_INTSTATUS  0x050 /* R DMAC Interrupt Status */
+#define DMAC_RESET             0x058 /* R DMAC Reset Register1 */
+
+/* DMA channel registers offset */
+#define CH_SAR                 0x000 /* R/W Chan Source Address */
+#define CH_DAR                 0x008 /* R/W Chan Destination Address */
+#define CH_BLOCK_TS            0x010 /* R/W Chan Block Transfer Size */
+#define CH_CTL                 0x018 /* R/W Chan Control */
+#define CH_CTL_L               0x018 /* R/W Chan Control 00-31 */
+#define CH_CTL_H               0x01C /* R/W Chan Control 32-63 */
+#define CH_CFG                 0x020 /* R/W Chan Configuration */
+#define CH_CFG_L               0x020 /* R/W Chan Configuration 00-31 */
+#define CH_CFG_H               0x024 /* R/W Chan Configuration 32-63 */
+#define CH_LLP                 0x028 /* R/W Chan Linked List Pointer */
+#define CH_STATUS              0x030 /* R Chan Status */
+#define CH_SWHSSRC             0x038 /* R/W Chan SW Handshake Source */
+#define CH_SWHSDST             0x040 /* R/W Chan SW Handshake Destination */
+#define CH_BLK_TFR_RESUMEREQ   0x048 /* W Chan Block Transfer Resume Req */
+#define CH_AXI_ID              0x050 /* R/W Chan AXI ID */
+#define CH_AXI_QOS             0x058 /* R/W Chan AXI QOS */
+#define CH_SSTAT               0x060 /* R Chan Source Status */
+#define CH_DSTAT               0x068 /* R Chan Destination Status */
+#define CH_SSTATAR             0x070 /* R/W Chan Source Status Fetch Addr */
+#define CH_DSTATAR             0x078 /* R/W Chan Destination Status Fetch Addr */
+#define CH_INTSTATUS_ENA       0x080 /* R/W Chan Interrupt Status Enable */
+#define CH_INTSTATUS           0x088 /* R/W Chan Interrupt Status */
+#define CH_INTSIGNAL_ENA       0x090 /* R/W Chan Interrupt Signal Enable */
+#define CH_INTCLEAR            0x098 /* W Chan Interrupt Clear */
+
+
+/* DMAC_CFG */
+#define DMAC_EN_POS                    0
+#define DMAC_EN_MASK                   BIT(DMAC_EN_POS)
+
+#define INT_EN_POS                     1
+#define INT_EN_MASK                    BIT(INT_EN_POS)
+
+#define DMAC_CHAN_EN_SHIFT             0
+#define DMAC_CHAN_EN_WE_SHIFT          8
+
+#define DMAC_CHAN_SUSP_SHIFT           16
+#define DMAC_CHAN_SUSP_WE_SHIFT                24
+
+/* CH_CTL_H */
+#define CH_CTL_H_ARLEN_EN              BIT(6)
+#define CH_CTL_H_ARLEN_POS             7
+#define CH_CTL_H_AWLEN_EN              BIT(15)
+#define CH_CTL_H_AWLEN_POS             16
+
+enum {
+       DWAXIDMAC_ARWLEN_1              = 0,
+       DWAXIDMAC_ARWLEN_2              = 1,
+       DWAXIDMAC_ARWLEN_4              = 3,
+       DWAXIDMAC_ARWLEN_8              = 7,
+       DWAXIDMAC_ARWLEN_16             = 15,
+       DWAXIDMAC_ARWLEN_32             = 31,
+       DWAXIDMAC_ARWLEN_64             = 63,
+       DWAXIDMAC_ARWLEN_128            = 127,
+       DWAXIDMAC_ARWLEN_256            = 255,
+       DWAXIDMAC_ARWLEN_MIN            = DWAXIDMAC_ARWLEN_1,
+       DWAXIDMAC_ARWLEN_MAX            = DWAXIDMAC_ARWLEN_256
+};
+
+#define CH_CTL_H_LLI_LAST              BIT(30)
+#define CH_CTL_H_LLI_VALID             BIT(31)
+
+/* CH_CTL_L */
+#define CH_CTL_L_LAST_WRITE_EN         BIT(30)
+
+#define CH_CTL_L_DST_MSIZE_POS         18
+#define CH_CTL_L_SRC_MSIZE_POS         14
+
+enum {
+       DWAXIDMAC_BURST_TRANS_LEN_1     = 0,
+       DWAXIDMAC_BURST_TRANS_LEN_4,
+       DWAXIDMAC_BURST_TRANS_LEN_8,
+       DWAXIDMAC_BURST_TRANS_LEN_16,
+       DWAXIDMAC_BURST_TRANS_LEN_32,
+       DWAXIDMAC_BURST_TRANS_LEN_64,
+       DWAXIDMAC_BURST_TRANS_LEN_128,
+       DWAXIDMAC_BURST_TRANS_LEN_256,
+       DWAXIDMAC_BURST_TRANS_LEN_512,
+       DWAXIDMAC_BURST_TRANS_LEN_1024
+};
+
+#define CH_CTL_L_DST_WIDTH_POS         11
+#define CH_CTL_L_SRC_WIDTH_POS         8
+
+#define CH_CTL_L_DST_INC_POS           6
+#define CH_CTL_L_SRC_INC_POS           4
+enum {
+       DWAXIDMAC_CH_CTL_L_INC          = 0,
+       DWAXIDMAC_CH_CTL_L_NOINC
+};
+
+#define CH_CTL_L_DST_MAST              BIT(2)
+#define CH_CTL_L_SRC_MAST              BIT(0)
+
+/* CH_CFG_H */
+#define CH_CFG_H_PRIORITY_POS          17
+#define CH_CFG_H_HS_SEL_DST_POS                4
+#define CH_CFG_H_HS_SEL_SRC_POS                3
+enum {
+       DWAXIDMAC_HS_SEL_HW             = 0,
+       DWAXIDMAC_HS_SEL_SW
+};
+
+#define CH_CFG_H_TT_FC_POS             0
+enum {
+       DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC = 0,
+       DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC,
+       DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC,
+       DWAXIDMAC_TT_FC_PER_TO_PER_DMAC,
+       DWAXIDMAC_TT_FC_PER_TO_MEM_SRC,
+       DWAXIDMAC_TT_FC_PER_TO_PER_SRC,
+       DWAXIDMAC_TT_FC_MEM_TO_PER_DST,
+       DWAXIDMAC_TT_FC_PER_TO_PER_DST
+};
+
+/* CH_CFG_L */
+#define CH_CFG_L_DST_MULTBLK_TYPE_POS  2
+#define CH_CFG_L_SRC_MULTBLK_TYPE_POS  0
+enum {
+       DWAXIDMAC_MBLK_TYPE_CONTIGUOUS  = 0,
+       DWAXIDMAC_MBLK_TYPE_RELOAD,
+       DWAXIDMAC_MBLK_TYPE_SHADOW_REG,
+       DWAXIDMAC_MBLK_TYPE_LL
+};
+
+/**
+ * DW AXI DMA channel interrupts
+ *
+ * @DWAXIDMAC_IRQ_NONE: Bitmask of no one interrupt
+ * @DWAXIDMAC_IRQ_BLOCK_TRF: Block transfer complete
+ * @DWAXIDMAC_IRQ_DMA_TRF: Dma transfer complete
+ * @DWAXIDMAC_IRQ_SRC_TRAN: Source transaction complete
+ * @DWAXIDMAC_IRQ_DST_TRAN: Destination transaction complete
+ * @DWAXIDMAC_IRQ_SRC_DEC_ERR: Source decode error
+ * @DWAXIDMAC_IRQ_DST_DEC_ERR: Destination decode error
+ * @DWAXIDMAC_IRQ_SRC_SLV_ERR: Source slave error
+ * @DWAXIDMAC_IRQ_DST_SLV_ERR: Destination slave error
+ * @DWAXIDMAC_IRQ_LLI_RD_DEC_ERR: LLI read decode error
+ * @DWAXIDMAC_IRQ_LLI_WR_DEC_ERR: LLI write decode error
+ * @DWAXIDMAC_IRQ_LLI_RD_SLV_ERR: LLI read slave error
+ * @DWAXIDMAC_IRQ_LLI_WR_SLV_ERR: LLI write slave error
+ * @DWAXIDMAC_IRQ_INVALID_ERR: LLI invalid error or Shadow register error
+ * @DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR: Slave Interface Multiblock type error
+ * @DWAXIDMAC_IRQ_DEC_ERR: Slave Interface decode error
+ * @DWAXIDMAC_IRQ_WR2RO_ERR: Slave Interface write to read only error
+ * @DWAXIDMAC_IRQ_RD2RWO_ERR: Slave Interface read to write only error
+ * @DWAXIDMAC_IRQ_WRONCHEN_ERR: Slave Interface write to channel error
+ * @DWAXIDMAC_IRQ_SHADOWREG_ERR: Slave Interface shadow reg error
+ * @DWAXIDMAC_IRQ_WRONHOLD_ERR: Slave Interface hold error
+ * @DWAXIDMAC_IRQ_LOCK_CLEARED: Lock Cleared Status
+ * @DWAXIDMAC_IRQ_SRC_SUSPENDED: Source Suspended Status
+ * @DWAXIDMAC_IRQ_SUSPENDED: Channel Suspended Status
+ * @DWAXIDMAC_IRQ_DISABLED: Channel Disabled Status
+ * @DWAXIDMAC_IRQ_ABORTED: Channel Aborted Status
+ * @DWAXIDMAC_IRQ_ALL_ERR: Bitmask of all error interrupts
+ * @DWAXIDMAC_IRQ_ALL: Bitmask of all interrupts
+ */
+enum {
+       DWAXIDMAC_IRQ_NONE              = 0,
+       DWAXIDMAC_IRQ_BLOCK_TRF         = BIT(0),
+       DWAXIDMAC_IRQ_DMA_TRF           = BIT(1),
+       DWAXIDMAC_IRQ_SRC_TRAN          = BIT(3),
+       DWAXIDMAC_IRQ_DST_TRAN          = BIT(4),
+       DWAXIDMAC_IRQ_SRC_DEC_ERR       = BIT(5),
+       DWAXIDMAC_IRQ_DST_DEC_ERR       = BIT(6),
+       DWAXIDMAC_IRQ_SRC_SLV_ERR       = BIT(7),
+       DWAXIDMAC_IRQ_DST_SLV_ERR       = BIT(8),
+       DWAXIDMAC_IRQ_LLI_RD_DEC_ERR    = BIT(9),
+       DWAXIDMAC_IRQ_LLI_WR_DEC_ERR    = BIT(10),
+       DWAXIDMAC_IRQ_LLI_RD_SLV_ERR    = BIT(11),
+       DWAXIDMAC_IRQ_LLI_WR_SLV_ERR    = BIT(12),
+       DWAXIDMAC_IRQ_INVALID_ERR       = BIT(13),
+       DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR  = BIT(14),
+       DWAXIDMAC_IRQ_DEC_ERR           = BIT(16),
+       DWAXIDMAC_IRQ_WR2RO_ERR         = BIT(17),
+       DWAXIDMAC_IRQ_RD2RWO_ERR        = BIT(18),
+       DWAXIDMAC_IRQ_WRONCHEN_ERR      = BIT(19),
+       DWAXIDMAC_IRQ_SHADOWREG_ERR     = BIT(20),
+       DWAXIDMAC_IRQ_WRONHOLD_ERR      = BIT(21),
+       DWAXIDMAC_IRQ_LOCK_CLEARED      = BIT(27),
+       DWAXIDMAC_IRQ_SRC_SUSPENDED     = BIT(28),
+       DWAXIDMAC_IRQ_SUSPENDED         = BIT(29),
+       DWAXIDMAC_IRQ_DISABLED          = BIT(30),
+       DWAXIDMAC_IRQ_ABORTED           = BIT(31),
+       DWAXIDMAC_IRQ_ALL_ERR           = (GENMASK(21, 16) | GENMASK(14, 5)),
+       DWAXIDMAC_IRQ_ALL               = GENMASK(31, 0)
+};
+
+enum {
+       DWAXIDMAC_TRANS_WIDTH_8         = 0,
+       DWAXIDMAC_TRANS_WIDTH_16,
+       DWAXIDMAC_TRANS_WIDTH_32,
+       DWAXIDMAC_TRANS_WIDTH_64,
+       DWAXIDMAC_TRANS_WIDTH_128,
+       DWAXIDMAC_TRANS_WIDTH_256,
+       DWAXIDMAC_TRANS_WIDTH_512,
+       DWAXIDMAC_TRANS_WIDTH_MAX       = DWAXIDMAC_TRANS_WIDTH_512
+};
+
+#endif /* _AXI_DMA_PLATFORM_H */
index 948df1ab5f1a26bfa564f55db06b7e352d4152bb..85ea92fcea5400e7fb5676f8c79c772e3c89b907 100644 (file)
@@ -1876,6 +1876,11 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
 
        if (memcpy_channels) {
                m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL);
+               if (!m_ddev) {
+                       dev_warn(ecc->dev, "memcpy is disabled due to OoM\n");
+                       memcpy_channels = NULL;
+                       goto ch_setup;
+               }
                ecc->dma_memcpy = m_ddev;
 
                dma_cap_zero(m_ddev->cap_mask);
@@ -1903,6 +1908,7 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
                dev_info(ecc->dev, "memcpy is disabled\n");
        }
 
+ch_setup:
        for (i = 0; i < ecc->num_channels; i++) {
                struct edma_chan *echan = &ecc->slave_chans[i];
                echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i);
index e7db24c67030d19a0db9c88d46ae867d11e4d0dd..ccd03c3cedfeda7c3508677e85017178d8ab2f63 100644 (file)
@@ -338,6 +338,7 @@ struct sdma_channel {
        unsigned int                    chn_real_count;
        struct tasklet_struct           tasklet;
        struct imx_dma_data             data;
+       bool                            enabled;
 };
 
 #define IMX_DMA_SG_LOOP                BIT(0)
@@ -596,7 +597,14 @@ static int sdma_config_ownership(struct sdma_channel *sdmac,
 
 static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
 {
+       unsigned long flags;
+       struct sdma_channel *sdmac = &sdma->channel[channel];
+
        writel(BIT(channel), sdma->regs + SDMA_H_START);
+
+       spin_lock_irqsave(&sdmac->lock, flags);
+       sdmac->enabled = true;
+       spin_unlock_irqrestore(&sdmac->lock, flags);
 }
 
 /*
@@ -685,6 +693,14 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac)
        struct sdma_buffer_descriptor *bd;
        int error = 0;
        enum dma_status old_status = sdmac->status;
+       unsigned long flags;
+
+       spin_lock_irqsave(&sdmac->lock, flags);
+       if (!sdmac->enabled) {
+               spin_unlock_irqrestore(&sdmac->lock, flags);
+               return;
+       }
+       spin_unlock_irqrestore(&sdmac->lock, flags);
 
        /*
         * loop mode. Iterate over descriptors, re-setup them and
@@ -938,10 +954,15 @@ static int sdma_disable_channel(struct dma_chan *chan)
        struct sdma_channel *sdmac = to_sdma_chan(chan);
        struct sdma_engine *sdma = sdmac->sdma;
        int channel = sdmac->channel;
+       unsigned long flags;
 
        writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
        sdmac->status = DMA_ERROR;
 
+       spin_lock_irqsave(&sdmac->lock, flags);
+       sdmac->enabled = false;
+       spin_unlock_irqrestore(&sdmac->lock, flags);
+
        return 0;
 }
 
diff --git a/drivers/dma/mediatek/Kconfig b/drivers/dma/mediatek/Kconfig
new file mode 100644 (file)
index 0000000..27bac0b
--- /dev/null
@@ -0,0 +1,13 @@
+
+config MTK_HSDMA
+       tristate "MediaTek High-Speed DMA controller support"
+       depends on ARCH_MEDIATEK || COMPILE_TEST
+       select DMA_ENGINE
+       select DMA_VIRTUAL_CHANNELS
+       ---help---
+         Enable support for High-Speed DMA controller on MediaTek
+         SoCs.
+
+         This controller provides the channels which is dedicated to
+         memory-to-memory transfer to offload from CPU through ring-
+         based descriptor management.
diff --git a/drivers/dma/mediatek/Makefile b/drivers/dma/mediatek/Makefile
new file mode 100644 (file)
index 0000000..6e778f8
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o
diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c
new file mode 100644 (file)
index 0000000..b7ec56a
--- /dev/null
@@ -0,0 +1,1056 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 MediaTek Inc.
+
+/*
+ * Driver for MediaTek High-Speed DMA Controller
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+
+#include "../virt-dma.h"
+
+#define MTK_HSDMA_USEC_POLL            20
+#define MTK_HSDMA_TIMEOUT_POLL         200000
+#define MTK_HSDMA_DMA_BUSWIDTHS                BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+/* The default number of virtual channel */
+#define MTK_HSDMA_NR_VCHANS            3
+
+/* Only one physical channel supported */
+#define MTK_HSDMA_NR_MAX_PCHANS                1
+
+/* Macro for physical descriptor (PD) manipulation */
+/* The number of PD which must be 2 of power */
+#define MTK_DMA_SIZE                   64
+#define MTK_HSDMA_NEXT_DESP_IDX(x, y)  (((x) + 1) & ((y) - 1))
+#define MTK_HSDMA_LAST_DESP_IDX(x, y)  (((x) - 1) & ((y) - 1))
+#define MTK_HSDMA_MAX_LEN              0x3f80
+#define MTK_HSDMA_ALIGN_SIZE           4
+#define MTK_HSDMA_PLEN_MASK            0x3fff
+#define MTK_HSDMA_DESC_PLEN(x)         (((x) & MTK_HSDMA_PLEN_MASK) << 16)
+#define MTK_HSDMA_DESC_PLEN_GET(x)     (((x) >> 16) & MTK_HSDMA_PLEN_MASK)
+
+/* Registers for underlying ring manipulation */
+#define MTK_HSDMA_TX_BASE              0x0
+#define MTK_HSDMA_TX_CNT               0x4
+#define MTK_HSDMA_TX_CPU               0x8
+#define MTK_HSDMA_TX_DMA               0xc
+#define MTK_HSDMA_RX_BASE              0x100
+#define MTK_HSDMA_RX_CNT               0x104
+#define MTK_HSDMA_RX_CPU               0x108
+#define MTK_HSDMA_RX_DMA               0x10c
+
+/* Registers for global setup */
+#define MTK_HSDMA_GLO                  0x204
+#define MTK_HSDMA_GLO_MULTI_DMA                BIT(10)
+#define MTK_HSDMA_TX_WB_DDONE          BIT(6)
+#define MTK_HSDMA_BURST_64BYTES                (0x2 << 4)
+#define MTK_HSDMA_GLO_RX_BUSY          BIT(3)
+#define MTK_HSDMA_GLO_RX_DMA           BIT(2)
+#define MTK_HSDMA_GLO_TX_BUSY          BIT(1)
+#define MTK_HSDMA_GLO_TX_DMA           BIT(0)
+#define MTK_HSDMA_GLO_DMA              (MTK_HSDMA_GLO_TX_DMA | \
+                                        MTK_HSDMA_GLO_RX_DMA)
+#define MTK_HSDMA_GLO_BUSY             (MTK_HSDMA_GLO_RX_BUSY | \
+                                        MTK_HSDMA_GLO_TX_BUSY)
+#define MTK_HSDMA_GLO_DEFAULT          (MTK_HSDMA_GLO_TX_DMA | \
+                                        MTK_HSDMA_GLO_RX_DMA | \
+                                        MTK_HSDMA_TX_WB_DDONE | \
+                                        MTK_HSDMA_BURST_64BYTES | \
+                                        MTK_HSDMA_GLO_MULTI_DMA)
+
+/* Registers for reset */
+#define MTK_HSDMA_RESET                        0x208
+#define MTK_HSDMA_RST_TX               BIT(0)
+#define MTK_HSDMA_RST_RX               BIT(16)
+
+/* Registers for interrupt control */
+#define MTK_HSDMA_DLYINT               0x20c
+#define MTK_HSDMA_RXDLY_INT_EN         BIT(15)
+
+/* Interrupt fires when the pending number's more than the specified */
+#define MTK_HSDMA_RXMAX_PINT(x)                (((x) & 0x7f) << 8)
+
+/* Interrupt fires when the pending time's more than the specified in 20 us */
+#define MTK_HSDMA_RXMAX_PTIME(x)       ((x) & 0x7f)
+#define MTK_HSDMA_DLYINT_DEFAULT       (MTK_HSDMA_RXDLY_INT_EN | \
+                                        MTK_HSDMA_RXMAX_PINT(20) | \
+                                        MTK_HSDMA_RXMAX_PTIME(20))
+#define MTK_HSDMA_INT_STATUS           0x220
+#define MTK_HSDMA_INT_ENABLE           0x228
+#define MTK_HSDMA_INT_RXDONE           BIT(16)
+
+enum mtk_hsdma_vdesc_flag {
+       MTK_HSDMA_VDESC_FINISHED        = 0x01,
+};
+
+#define IS_MTK_HSDMA_VDESC_FINISHED(x) ((x) == MTK_HSDMA_VDESC_FINISHED)
+
+/**
+ * struct mtk_hsdma_pdesc - This is the struct holding info describing physical
+ *                         descriptor (PD) and its placement must be kept at
+ *                         4-bytes alignment in little endian order.
+ * @desc[1-4]:             The control pad used to indicate hardware how to
+ *                         deal with the descriptor such as source and
+ *                         destination address and data length. The maximum
+ *                         data length each pdesc can handle is 0x3f80 bytes
+ */
+struct mtk_hsdma_pdesc {
+       __le32 desc1;
+       __le32 desc2;
+       __le32 desc3;
+       __le32 desc4;
+} __packed __aligned(4);
+
+/**
+ * struct mtk_hsdma_vdesc - This is the struct holding info describing virtual
+ *                         descriptor (VD)
+ * @vd:                            An instance for struct virt_dma_desc
+ * @len:                   The total data size device wants to move
+ * @residue:               The remaining data size device will move
+ * @dest:                  The destination address device wants to move to
+ * @src:                   The source address device wants to move from
+ */
+struct mtk_hsdma_vdesc {
+       struct virt_dma_desc vd;
+       size_t len;
+       size_t residue;
+       dma_addr_t dest;
+       dma_addr_t src;
+};
+
+/**
+ * struct mtk_hsdma_cb - This is the struct holding extra info required for RX
+ *                      ring to know what relevant VD the the PD is being
+ *                      mapped to.
+ * @vd:                         Pointer to the relevant VD.
+ * @flag:               Flag indicating what action should be taken when VD
+ *                      is completed.
+ */
+struct mtk_hsdma_cb {
+       struct virt_dma_desc *vd;
+       enum mtk_hsdma_vdesc_flag flag;
+};
+
+/**
+ * struct mtk_hsdma_ring - This struct holds info describing underlying ring
+ *                        space
+ * @txd:                  The descriptor TX ring which describes DMA source
+ *                        information
+ * @rxd:                  The descriptor RX ring which describes DMA
+ *                        destination information
+ * @cb:                           The extra information pointed at by RX ring
+ * @tphys:                The physical addr of TX ring
+ * @rphys:                The physical addr of RX ring
+ * @cur_tptr:             Pointer to the next free descriptor used by the host
+ * @cur_rptr:             Pointer to the last done descriptor by the device
+ */
+struct mtk_hsdma_ring {
+       struct mtk_hsdma_pdesc *txd;
+       struct mtk_hsdma_pdesc *rxd;
+       struct mtk_hsdma_cb *cb;
+       dma_addr_t tphys;
+       dma_addr_t rphys;
+       u16 cur_tptr;
+       u16 cur_rptr;
+};
+
+/**
+ * struct mtk_hsdma_pchan - This is the struct holding info describing physical
+ *                        channel (PC)
+ * @ring:                 An instance for the underlying ring
+ * @sz_ring:              Total size allocated for the ring
+ * @nr_free:              Total number of free rooms in the ring. It would
+ *                        be accessed and updated frequently between IRQ
+ *                        context and user context to reflect whether ring
+ *                        can accept requests from VD.
+ */
+struct mtk_hsdma_pchan {
+       struct mtk_hsdma_ring ring;
+       size_t sz_ring;
+       atomic_t nr_free;
+};
+
+/**
+ * struct mtk_hsdma_vchan - This is the struct holding info describing virtual
+ *                        channel (VC)
+ * @vc:                           An instance for struct virt_dma_chan
+ * @issue_completion:     The wait for all issued descriptors completited
+ * @issue_synchronize:    Bool indicating channel synchronization starts
+ * @desc_hw_processing:           List those descriptors the hardware is processing,
+ *                        which is protected by vc.lock
+ */
+struct mtk_hsdma_vchan {
+       struct virt_dma_chan vc;
+       struct completion issue_completion;
+       bool issue_synchronize;
+       struct list_head desc_hw_processing;
+};
+
+/**
+ * struct mtk_hsdma_soc - This is the struct holding differences among SoCs
+ * @ddone:               Bit mask for DDONE
+ * @ls0:                 Bit mask for LS0
+ */
+struct mtk_hsdma_soc {
+       __le32 ddone;
+       __le32 ls0;
+};
+
+/**
+ * struct mtk_hsdma_device - This is the struct holding info describing HSDMA
+ *                          device
+ * @ddev:                   An instance for struct dma_device
+ * @base:                   The mapped register I/O base
+ * @clk:                    The clock that device internal is using
+ * @irq:                    The IRQ that device are using
+ * @dma_requests:           The number of VCs the device supports to
+ * @vc:                             The pointer to all available VCs
+ * @pc:                             The pointer to the underlying PC
+ * @pc_refcnt:              Track how many VCs are using the PC
+ * @lock:                   Lock protect agaisting multiple VCs access PC
+ * @soc:                    The pointer to area holding differences among
+ *                          vaious platform
+ */
+struct mtk_hsdma_device {
+       struct dma_device ddev;
+       void __iomem *base;
+       struct clk *clk;
+       u32 irq;
+
+       u32 dma_requests;
+       struct mtk_hsdma_vchan *vc;
+       struct mtk_hsdma_pchan *pc;
+       refcount_t pc_refcnt;
+
+       /* Lock used to protect against multiple VCs access PC */
+       spinlock_t lock;
+
+       const struct mtk_hsdma_soc *soc;
+};
+
+static struct mtk_hsdma_device *to_hsdma_dev(struct dma_chan *chan)
+{
+       return container_of(chan->device, struct mtk_hsdma_device, ddev);
+}
+
+static inline struct mtk_hsdma_vchan *to_hsdma_vchan(struct dma_chan *chan)
+{
+       return container_of(chan, struct mtk_hsdma_vchan, vc.chan);
+}
+
+static struct mtk_hsdma_vdesc *to_hsdma_vdesc(struct virt_dma_desc *vd)
+{
+       return container_of(vd, struct mtk_hsdma_vdesc, vd);
+}
+
+static struct device *hsdma2dev(struct mtk_hsdma_device *hsdma)
+{
+       return hsdma->ddev.dev;
+}
+
+static u32 mtk_dma_read(struct mtk_hsdma_device *hsdma, u32 reg)
+{
+       return readl(hsdma->base + reg);
+}
+
+static void mtk_dma_write(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+       writel(val, hsdma->base + reg);
+}
+
+static void mtk_dma_rmw(struct mtk_hsdma_device *hsdma, u32 reg,
+                       u32 mask, u32 set)
+{
+       u32 val;
+
+       val = mtk_dma_read(hsdma, reg);
+       val &= ~mask;
+       val |= set;
+       mtk_dma_write(hsdma, reg, val);
+}
+
+static void mtk_dma_set(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+       mtk_dma_rmw(hsdma, reg, 0, val);
+}
+
+static void mtk_dma_clr(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+       mtk_dma_rmw(hsdma, reg, val, 0);
+}
+
+static void mtk_hsdma_vdesc_free(struct virt_dma_desc *vd)
+{
+       kfree(container_of(vd, struct mtk_hsdma_vdesc, vd));
+}
+
+static int mtk_hsdma_busy_wait(struct mtk_hsdma_device *hsdma)
+{
+       u32 status = 0;
+
+       return readl_poll_timeout(hsdma->base + MTK_HSDMA_GLO, status,
+                                 !(status & MTK_HSDMA_GLO_BUSY),
+                                 MTK_HSDMA_USEC_POLL,
+                                 MTK_HSDMA_TIMEOUT_POLL);
+}
+
+static int mtk_hsdma_alloc_pchan(struct mtk_hsdma_device *hsdma,
+                                struct mtk_hsdma_pchan *pc)
+{
+       struct mtk_hsdma_ring *ring = &pc->ring;
+       int err;
+
+       memset(pc, 0, sizeof(*pc));
+
+       /*
+        * Allocate ring space where [0 ... MTK_DMA_SIZE - 1] is for TX ring
+        * and [MTK_DMA_SIZE ... 2 * MTK_DMA_SIZE - 1] is for RX ring.
+        */
+       pc->sz_ring = 2 * MTK_DMA_SIZE * sizeof(*ring->txd);
+       ring->txd = dma_zalloc_coherent(hsdma2dev(hsdma), pc->sz_ring,
+                                       &ring->tphys, GFP_NOWAIT);
+       if (!ring->txd)
+               return -ENOMEM;
+
+       ring->rxd = &ring->txd[MTK_DMA_SIZE];
+       ring->rphys = ring->tphys + MTK_DMA_SIZE * sizeof(*ring->txd);
+       ring->cur_tptr = 0;
+       ring->cur_rptr = MTK_DMA_SIZE - 1;
+
+       ring->cb = kcalloc(MTK_DMA_SIZE, sizeof(*ring->cb), GFP_NOWAIT);
+       if (!ring->cb) {
+               err = -ENOMEM;
+               goto err_free_dma;
+       }
+
+       atomic_set(&pc->nr_free, MTK_DMA_SIZE - 1);
+
+       /* Disable HSDMA and wait for the completion */
+       mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+       err = mtk_hsdma_busy_wait(hsdma);
+       if (err)
+               goto err_free_cb;
+
+       /* Reset */
+       mtk_dma_set(hsdma, MTK_HSDMA_RESET,
+                   MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+       mtk_dma_clr(hsdma, MTK_HSDMA_RESET,
+                   MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+
+       /* Setup HSDMA initial pointer in the ring */
+       mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, ring->tphys);
+       mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, MTK_DMA_SIZE);
+       mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+       mtk_dma_write(hsdma, MTK_HSDMA_TX_DMA, 0);
+       mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, ring->rphys);
+       mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, MTK_DMA_SIZE);
+       mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, ring->cur_rptr);
+       mtk_dma_write(hsdma, MTK_HSDMA_RX_DMA, 0);
+
+       /* Enable HSDMA */
+       mtk_dma_set(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+
+       /* Setup delayed interrupt */
+       mtk_dma_write(hsdma, MTK_HSDMA_DLYINT, MTK_HSDMA_DLYINT_DEFAULT);
+
+       /* Enable interrupt */
+       mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+       return 0;
+
+err_free_cb:
+       kfree(ring->cb);
+
+err_free_dma:
+       dma_free_coherent(hsdma2dev(hsdma),
+                         pc->sz_ring, ring->txd, ring->tphys);
+       return err;
+}
+
+static void mtk_hsdma_free_pchan(struct mtk_hsdma_device *hsdma,
+                                struct mtk_hsdma_pchan *pc)
+{
+       struct mtk_hsdma_ring *ring = &pc->ring;
+
+       /* Disable HSDMA and then wait for the completion */
+       mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+       mtk_hsdma_busy_wait(hsdma);
+
+       /* Reset pointer in the ring */
+       mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+       mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, 0);
+       mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, 0);
+       mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, 0);
+       mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, 0);
+       mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, 0);
+       mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, MTK_DMA_SIZE - 1);
+
+       kfree(ring->cb);
+
+       dma_free_coherent(hsdma2dev(hsdma),
+                         pc->sz_ring, ring->txd, ring->tphys);
+}
+
+static int mtk_hsdma_issue_pending_vdesc(struct mtk_hsdma_device *hsdma,
+                                        struct mtk_hsdma_pchan *pc,
+                                        struct mtk_hsdma_vdesc *hvd)
+{
+       struct mtk_hsdma_ring *ring = &pc->ring;
+       struct mtk_hsdma_pdesc *txd, *rxd;
+       u16 reserved, prev, tlen, num_sgs;
+       unsigned long flags;
+
+       /* Protect against PC is accessed by multiple VCs simultaneously */
+       spin_lock_irqsave(&hsdma->lock, flags);
+
+       /*
+        * Reserve rooms, where pc->nr_free is used to track how many free
+        * rooms in the ring being updated in user and IRQ context.
+        */
+       num_sgs = DIV_ROUND_UP(hvd->len, MTK_HSDMA_MAX_LEN);
+       reserved = min_t(u16, num_sgs, atomic_read(&pc->nr_free));
+
+       if (!reserved) {
+               spin_unlock_irqrestore(&hsdma->lock, flags);
+               return -ENOSPC;
+       }
+
+       atomic_sub(reserved, &pc->nr_free);
+
+       while (reserved--) {
+               /* Limit size by PD capability for valid data moving */
+               tlen = (hvd->len > MTK_HSDMA_MAX_LEN) ?
+                      MTK_HSDMA_MAX_LEN : hvd->len;
+
+               /*
+                * Setup PDs using the remaining VD info mapped on those
+                * reserved rooms. And since RXD is shared memory between the
+                * host and the device allocated by dma_alloc_coherent call,
+                * the helper macro WRITE_ONCE can ensure the data written to
+                * RAM would really happens.
+                */
+               txd = &ring->txd[ring->cur_tptr];
+               WRITE_ONCE(txd->desc1, hvd->src);
+               WRITE_ONCE(txd->desc2,
+                          hsdma->soc->ls0 | MTK_HSDMA_DESC_PLEN(tlen));
+
+               rxd = &ring->rxd[ring->cur_tptr];
+               WRITE_ONCE(rxd->desc1, hvd->dest);
+               WRITE_ONCE(rxd->desc2, MTK_HSDMA_DESC_PLEN(tlen));
+
+               /* Associate VD, the PD belonged to */
+               ring->cb[ring->cur_tptr].vd = &hvd->vd;
+
+               /* Move forward the pointer of TX ring */
+               ring->cur_tptr = MTK_HSDMA_NEXT_DESP_IDX(ring->cur_tptr,
+                                                        MTK_DMA_SIZE);
+
+               /* Update VD with remaining data */
+               hvd->src  += tlen;
+               hvd->dest += tlen;
+               hvd->len  -= tlen;
+       }
+
+       /*
+        * Tagging flag for the last PD for VD will be responsible for
+        * completing VD.
+        */
+       if (!hvd->len) {
+               prev = MTK_HSDMA_LAST_DESP_IDX(ring->cur_tptr, MTK_DMA_SIZE);
+               ring->cb[prev].flag = MTK_HSDMA_VDESC_FINISHED;
+       }
+
+       /* Ensure all changes indeed done before we're going on */
+       wmb();
+
+       /*
+        * Updating into hardware the pointer of TX ring lets HSDMA to take
+        * action for those pending PDs.
+        */
+       mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+
+       spin_unlock_irqrestore(&hsdma->lock, flags);
+
+       return 0;
+}
+
+static void mtk_hsdma_issue_vchan_pending(struct mtk_hsdma_device *hsdma,
+                                         struct mtk_hsdma_vchan *hvc)
+{
+       struct virt_dma_desc *vd, *vd2;
+       int err;
+
+       lockdep_assert_held(&hvc->vc.lock);
+
+       list_for_each_entry_safe(vd, vd2, &hvc->vc.desc_issued, node) {
+               struct mtk_hsdma_vdesc *hvd;
+
+               hvd = to_hsdma_vdesc(vd);
+
+               /* Map VD into PC and all VCs shares a single PC */
+               err = mtk_hsdma_issue_pending_vdesc(hsdma, hsdma->pc, hvd);
+
+               /*
+                * Move VD from desc_issued to desc_hw_processing when entire
+                * VD is fit into available PDs. Otherwise, the uncompleted
+                * VDs would stay in list desc_issued and then restart the
+                * processing as soon as possible once underlying ring space
+                * got freed.
+                */
+               if (err == -ENOSPC || hvd->len > 0)
+                       break;
+
+               /*
+                * The extra list desc_hw_processing is used because
+                * hardware can't provide sufficient information allowing us
+                * to know what VDs are still working on the underlying ring.
+                * Through the additional list, it can help us to implement
+                * terminate_all, residue calculation and such thing needed
+                * to know detail descriptor status on the hardware.
+                */
+               list_move_tail(&vd->node, &hvc->desc_hw_processing);
+       }
+}
+
+static void mtk_hsdma_free_rooms_in_ring(struct mtk_hsdma_device *hsdma)
+{
+       struct mtk_hsdma_vchan *hvc;
+       struct mtk_hsdma_pdesc *rxd;
+       struct mtk_hsdma_vdesc *hvd;
+       struct mtk_hsdma_pchan *pc;
+       struct mtk_hsdma_cb *cb;
+       int i = MTK_DMA_SIZE;
+       __le32 desc2;
+       u32 status;
+       u16 next;
+
+       /* Read IRQ status */
+       status = mtk_dma_read(hsdma, MTK_HSDMA_INT_STATUS);
+       if (unlikely(!(status & MTK_HSDMA_INT_RXDONE)))
+               goto rx_done;
+
+       pc = hsdma->pc;
+
+       /*
+        * Using a fail-safe loop with iterations of up to MTK_DMA_SIZE to
+        * reclaim these finished descriptors: The most number of PDs the ISR
+        * can handle at one time shouldn't be more than MTK_DMA_SIZE so we
+        * take it as limited count instead of just using a dangerous infinite
+        * poll.
+        */
+       while (i--) {
+               next = MTK_HSDMA_NEXT_DESP_IDX(pc->ring.cur_rptr,
+                                              MTK_DMA_SIZE);
+               rxd = &pc->ring.rxd[next];
+
+               /*
+                * If MTK_HSDMA_DESC_DDONE is no specified, that means data
+                * moving for the PD is still under going.
+                */
+               desc2 = READ_ONCE(rxd->desc2);
+               if (!(desc2 & hsdma->soc->ddone))
+                       break;
+
+               cb = &pc->ring.cb[next];
+               if (unlikely(!cb->vd)) {
+                       dev_err(hsdma2dev(hsdma), "cb->vd cannot be null\n");
+                       break;
+               }
+
+               /* Update residue of VD the associated PD belonged to */
+               hvd = to_hsdma_vdesc(cb->vd);
+               hvd->residue -= MTK_HSDMA_DESC_PLEN_GET(rxd->desc2);
+
+               /* Complete VD until the relevant last PD is finished */
+               if (IS_MTK_HSDMA_VDESC_FINISHED(cb->flag)) {
+                       hvc = to_hsdma_vchan(cb->vd->tx.chan);
+
+                       spin_lock(&hvc->vc.lock);
+
+                       /* Remove VD from list desc_hw_processing */
+                       list_del(&cb->vd->node);
+
+                       /* Add VD into list desc_completed */
+                       vchan_cookie_complete(cb->vd);
+
+                       if (hvc->issue_synchronize &&
+                           list_empty(&hvc->desc_hw_processing)) {
+                               complete(&hvc->issue_completion);
+                               hvc->issue_synchronize = false;
+                       }
+                       spin_unlock(&hvc->vc.lock);
+
+                       cb->flag = 0;
+               }
+
+               cb->vd = 0;
+
+               /*
+                * Recycle the RXD with the helper WRITE_ONCE that can ensure
+                * data written into RAM would really happens.
+                */
+               WRITE_ONCE(rxd->desc1, 0);
+               WRITE_ONCE(rxd->desc2, 0);
+               pc->ring.cur_rptr = next;
+
+               /* Release rooms */
+               atomic_inc(&pc->nr_free);
+       }
+
+       /* Ensure all changes indeed done before we're going on */
+       wmb();
+
+       /* Update CPU pointer for those completed PDs */
+       mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, pc->ring.cur_rptr);
+
+       /*
+        * Acking the pending IRQ allows hardware no longer to keep the used
+        * IRQ line in certain trigger state when software has completed all
+        * the finished physical descriptors.
+        */
+       if (atomic_read(&pc->nr_free) >= MTK_DMA_SIZE - 1)
+               mtk_dma_write(hsdma, MTK_HSDMA_INT_STATUS, status);
+
+       /* ASAP handles pending VDs in all VCs after freeing some rooms */
+       for (i = 0; i < hsdma->dma_requests; i++) {
+               hvc = &hsdma->vc[i];
+               spin_lock(&hvc->vc.lock);
+               mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+               spin_unlock(&hvc->vc.lock);
+       }
+
+rx_done:
+       /* All completed PDs are cleaned up, so enable interrupt again */
+       mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+}
+
+static irqreturn_t mtk_hsdma_irq(int irq, void *devid)
+{
+       struct mtk_hsdma_device *hsdma = devid;
+
+       /*
+        * Disable interrupt until all completed PDs are cleaned up in
+        * mtk_hsdma_free_rooms call.
+        */
+       mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+       mtk_hsdma_free_rooms_in_ring(hsdma);
+
+       return IRQ_HANDLED;
+}
+
+static struct virt_dma_desc *mtk_hsdma_find_active_desc(struct dma_chan *c,
+                                                       dma_cookie_t cookie)
+{
+       struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+       struct virt_dma_desc *vd;
+
+       list_for_each_entry(vd, &hvc->desc_hw_processing, node)
+               if (vd->tx.cookie == cookie)
+                       return vd;
+
+       list_for_each_entry(vd, &hvc->vc.desc_issued, node)
+               if (vd->tx.cookie == cookie)
+                       return vd;
+
+       return NULL;
+}
+
+static enum dma_status mtk_hsdma_tx_status(struct dma_chan *c,
+                                          dma_cookie_t cookie,
+                                          struct dma_tx_state *txstate)
+{
+       struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+       struct mtk_hsdma_vdesc *hvd;
+       struct virt_dma_desc *vd;
+       enum dma_status ret;
+       unsigned long flags;
+       size_t bytes = 0;
+
+       ret = dma_cookie_status(c, cookie, txstate);
+       if (ret == DMA_COMPLETE || !txstate)
+               return ret;
+
+       spin_lock_irqsave(&hvc->vc.lock, flags);
+       vd = mtk_hsdma_find_active_desc(c, cookie);
+       spin_unlock_irqrestore(&hvc->vc.lock, flags);
+
+       if (vd) {
+               hvd = to_hsdma_vdesc(vd);
+               bytes = hvd->residue;
+       }
+
+       dma_set_residue(txstate, bytes);
+
+       return ret;
+}
+
+static void mtk_hsdma_issue_pending(struct dma_chan *c)
+{
+       struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+       struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+       unsigned long flags;
+
+       spin_lock_irqsave(&hvc->vc.lock, flags);
+
+       if (vchan_issue_pending(&hvc->vc))
+               mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+
+       spin_unlock_irqrestore(&hvc->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *
+mtk_hsdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest,
+                         dma_addr_t src, size_t len, unsigned long flags)
+{
+       struct mtk_hsdma_vdesc *hvd;
+
+       hvd = kzalloc(sizeof(*hvd), GFP_NOWAIT);
+       if (!hvd)
+               return NULL;
+
+       hvd->len = len;
+       hvd->residue = len;
+       hvd->src = src;
+       hvd->dest = dest;
+
+       return vchan_tx_prep(to_virt_chan(c), &hvd->vd, flags);
+}
+
+static int mtk_hsdma_free_inactive_desc(struct dma_chan *c)
+{
+       struct virt_dma_chan *vc = to_virt_chan(c);
+       unsigned long flags;
+       LIST_HEAD(head);
+
+       spin_lock_irqsave(&vc->lock, flags);
+       list_splice_tail_init(&vc->desc_allocated, &head);
+       list_splice_tail_init(&vc->desc_submitted, &head);
+       list_splice_tail_init(&vc->desc_issued, &head);
+       spin_unlock_irqrestore(&vc->lock, flags);
+
+       /* At the point, we don't expect users put descriptor into VC again */
+       vchan_dma_desc_free_list(vc, &head);
+
+       return 0;
+}
+
+static void mtk_hsdma_free_active_desc(struct dma_chan *c)
+{
+       struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+       bool sync_needed = false;
+
+       /*
+        * Once issue_synchronize is being set, which means once the hardware
+        * consumes all descriptors for the channel in the ring, the
+        * synchronization must be be notified immediately it is completed.
+        */
+       spin_lock(&hvc->vc.lock);
+       if (!list_empty(&hvc->desc_hw_processing)) {
+               hvc->issue_synchronize = true;
+               sync_needed = true;
+       }
+       spin_unlock(&hvc->vc.lock);
+
+       if (sync_needed)
+               wait_for_completion(&hvc->issue_completion);
+       /*
+        * At the point, we expect that all remaining descriptors in the ring
+        * for the channel should be all processing done.
+        */
+       WARN_ONCE(!list_empty(&hvc->desc_hw_processing),
+                 "Desc pending still in list desc_hw_processing\n");
+
+       /* Free all descriptors in list desc_completed */
+       vchan_synchronize(&hvc->vc);
+
+       WARN_ONCE(!list_empty(&hvc->vc.desc_completed),
+                 "Desc pending still in list desc_completed\n");
+}
+
+static int mtk_hsdma_terminate_all(struct dma_chan *c)
+{
+       /*
+        * Free pending descriptors not processed yet by hardware that have
+        * previously been submitted to the channel.
+        */
+       mtk_hsdma_free_inactive_desc(c);
+
+       /*
+        * However, the DMA engine doesn't provide any way to stop these
+        * descriptors being processed currently by hardware. The only way is
+        * to just waiting until these descriptors are all processed completely
+        * through mtk_hsdma_free_active_desc call.
+        */
+       mtk_hsdma_free_active_desc(c);
+
+       return 0;
+}
+
+static int mtk_hsdma_alloc_chan_resources(struct dma_chan *c)
+{
+       struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+       int err;
+
+       /*
+        * Since HSDMA has only one PC, the resource for PC is being allocated
+        * when the first VC is being created and the other VCs would run on
+        * the same PC.
+        */
+       if (!refcount_read(&hsdma->pc_refcnt)) {
+               err = mtk_hsdma_alloc_pchan(hsdma, hsdma->pc);
+               if (err)
+                       return err;
+               /*
+                * refcount_inc would complain increment on 0; use-after-free.
+                * Thus, we need to explicitly set it as 1 initially.
+                */
+               refcount_set(&hsdma->pc_refcnt, 1);
+       } else {
+               refcount_inc(&hsdma->pc_refcnt);
+       }
+
+       return 0;
+}
+
+static void mtk_hsdma_free_chan_resources(struct dma_chan *c)
+{
+       struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+
+       /* Free all descriptors in all lists on the VC */
+       mtk_hsdma_terminate_all(c);
+
+       /* The resource for PC is not freed until all the VCs are destroyed */
+       if (!refcount_dec_and_test(&hsdma->pc_refcnt))
+               return;
+
+       mtk_hsdma_free_pchan(hsdma, hsdma->pc);
+}
+
+static int mtk_hsdma_hw_init(struct mtk_hsdma_device *hsdma)
+{
+       int err;
+
+       pm_runtime_enable(hsdma2dev(hsdma));
+       pm_runtime_get_sync(hsdma2dev(hsdma));
+
+       err = clk_prepare_enable(hsdma->clk);
+       if (err)
+               return err;
+
+       mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+       mtk_dma_write(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DEFAULT);
+
+       return 0;
+}
+
+static int mtk_hsdma_hw_deinit(struct mtk_hsdma_device *hsdma)
+{
+       mtk_dma_write(hsdma, MTK_HSDMA_GLO, 0);
+
+       clk_disable_unprepare(hsdma->clk);
+
+       pm_runtime_put_sync(hsdma2dev(hsdma));
+       pm_runtime_disable(hsdma2dev(hsdma));
+
+       return 0;
+}
+
+static const struct mtk_hsdma_soc mt7623_soc = {
+       .ddone = BIT(31),
+       .ls0 = BIT(30),
+};
+
+static const struct mtk_hsdma_soc mt7622_soc = {
+       .ddone = BIT(15),
+       .ls0 = BIT(14),
+};
+
+static const struct of_device_id mtk_hsdma_match[] = {
+       { .compatible = "mediatek,mt7623-hsdma", .data = &mt7623_soc},
+       { .compatible = "mediatek,mt7622-hsdma", .data = &mt7622_soc},
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_hsdma_match);
+
+static int mtk_hsdma_probe(struct platform_device *pdev)
+{
+       struct mtk_hsdma_device *hsdma;
+       struct mtk_hsdma_vchan *vc;
+       struct dma_device *dd;
+       struct resource *res;
+       int i, err;
+
+       hsdma = devm_kzalloc(&pdev->dev, sizeof(*hsdma), GFP_KERNEL);
+       if (!hsdma)
+               return -ENOMEM;
+
+       dd = &hsdma->ddev;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       hsdma->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(hsdma->base))
+               return PTR_ERR(hsdma->base);
+
+       hsdma->soc = of_device_get_match_data(&pdev->dev);
+       if (!hsdma->soc) {
+               dev_err(&pdev->dev, "No device match found\n");
+               return -ENODEV;
+       }
+
+       hsdma->clk = devm_clk_get(&pdev->dev, "hsdma");
+       if (IS_ERR(hsdma->clk)) {
+               dev_err(&pdev->dev, "No clock for %s\n",
+                       dev_name(&pdev->dev));
+               return PTR_ERR(hsdma->clk);
+       }
+
+       res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+       if (!res) {
+               dev_err(&pdev->dev, "No irq resource for %s\n",
+                       dev_name(&pdev->dev));
+               return -EINVAL;
+       }
+       hsdma->irq = res->start;
+
+       refcount_set(&hsdma->pc_refcnt, 0);
+       spin_lock_init(&hsdma->lock);
+
+       dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+
+       dd->copy_align = MTK_HSDMA_ALIGN_SIZE;
+       dd->device_alloc_chan_resources = mtk_hsdma_alloc_chan_resources;
+       dd->device_free_chan_resources = mtk_hsdma_free_chan_resources;
+       dd->device_tx_status = mtk_hsdma_tx_status;
+       dd->device_issue_pending = mtk_hsdma_issue_pending;
+       dd->device_prep_dma_memcpy = mtk_hsdma_prep_dma_memcpy;
+       dd->device_terminate_all = mtk_hsdma_terminate_all;
+       dd->src_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+       dd->dst_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+       dd->directions = BIT(DMA_MEM_TO_MEM);
+       dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+       dd->dev = &pdev->dev;
+       INIT_LIST_HEAD(&dd->channels);
+
+       hsdma->dma_requests = MTK_HSDMA_NR_VCHANS;
+       if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+                                                     "dma-requests",
+                                                     &hsdma->dma_requests)) {
+               dev_info(&pdev->dev,
+                        "Using %u as missing dma-requests property\n",
+                        MTK_HSDMA_NR_VCHANS);
+       }
+
+       hsdma->pc = devm_kcalloc(&pdev->dev, MTK_HSDMA_NR_MAX_PCHANS,
+                                sizeof(*hsdma->pc), GFP_KERNEL);
+       if (!hsdma->pc)
+               return -ENOMEM;
+
+       hsdma->vc = devm_kcalloc(&pdev->dev, hsdma->dma_requests,
+                                sizeof(*hsdma->vc), GFP_KERNEL);
+       if (!hsdma->vc)
+               return -ENOMEM;
+
+       for (i = 0; i < hsdma->dma_requests; i++) {
+               vc = &hsdma->vc[i];
+               vc->vc.desc_free = mtk_hsdma_vdesc_free;
+               vchan_init(&vc->vc, dd);
+               init_completion(&vc->issue_completion);
+               INIT_LIST_HEAD(&vc->desc_hw_processing);
+       }
+
+       err = dma_async_device_register(dd);
+       if (err)
+               return err;
+
+       err = of_dma_controller_register(pdev->dev.of_node,
+                                        of_dma_xlate_by_chan_id, hsdma);
+       if (err) {
+               dev_err(&pdev->dev,
+                       "MediaTek HSDMA OF registration failed %d\n", err);
+               goto err_unregister;
+       }
+
+       mtk_hsdma_hw_init(hsdma);
+
+       err = devm_request_irq(&pdev->dev, hsdma->irq,
+                              mtk_hsdma_irq, 0,
+                              dev_name(&pdev->dev), hsdma);
+       if (err) {
+               dev_err(&pdev->dev,
+                       "request_irq failed with err %d\n", err);
+               goto err_unregister;
+       }
+
+       platform_set_drvdata(pdev, hsdma);
+
+       dev_info(&pdev->dev, "MediaTek HSDMA driver registered\n");
+
+       return 0;
+
+err_unregister:
+       dma_async_device_unregister(dd);
+
+       return err;
+}
+
+static int mtk_hsdma_remove(struct platform_device *pdev)
+{
+       struct mtk_hsdma_device *hsdma = platform_get_drvdata(pdev);
+       struct mtk_hsdma_vchan *vc;
+       int i;
+
+       /* Kill VC task */
+       for (i = 0; i < hsdma->dma_requests; i++) {
+               vc = &hsdma->vc[i];
+
+               list_del(&vc->vc.chan.device_node);
+               tasklet_kill(&vc->vc.task);
+       }
+
+       /* Disable DMA interrupt */
+       mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+
+       /* Waits for any pending IRQ handlers to complete */
+       synchronize_irq(hsdma->irq);
+
+       /* Disable hardware */
+       mtk_hsdma_hw_deinit(hsdma);
+
+       dma_async_device_unregister(&hsdma->ddev);
+       of_dma_controller_free(pdev->dev.of_node);
+
+       return 0;
+}
+
+static struct platform_driver mtk_hsdma_driver = {
+       .probe          = mtk_hsdma_probe,
+       .remove         = mtk_hsdma_remove,
+       .driver = {
+               .name           = KBUILD_MODNAME,
+               .of_match_table = mtk_hsdma_match,
+       },
+};
+module_platform_driver(mtk_hsdma_driver);
+
+MODULE_DESCRIPTION("MediaTek High-Speed DMA Controller Driver");
+MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
index d7327fd5f445667a30f2e691c321a46ad03e862c..de1fd59fe13699b5efb84500be5aff699fe1a1d9 100644 (file)
@@ -1510,7 +1510,7 @@ static void pl330_dotask(unsigned long data)
 /* Returns 1 if state was updated, 0 otherwise */
 static int pl330_update(struct pl330_dmac *pl330)
 {
-       struct dma_pl330_desc *descdone, *tmp;
+       struct dma_pl330_desc *descdone;
        unsigned long flags;
        void __iomem *regs;
        u32 val;
@@ -1588,7 +1588,9 @@ static int pl330_update(struct pl330_dmac *pl330)
        }
 
        /* Now that we are in no hurry, do the callbacks */
-       list_for_each_entry_safe(descdone, tmp, &pl330->req_done, rqd) {
+       while (!list_empty(&pl330->req_done)) {
+               descdone = list_first_entry(&pl330->req_done,
+                                           struct dma_pl330_desc, rqd);
                list_del(&descdone->rqd);
                spin_unlock_irqrestore(&pl330->lock, flags);
                dma_pl330_rqcb(descdone, PL330_ERR_NONE);
index d076940e0c69ada23dc451367df9d71c88649dc7..d29275b97e8453a1fd7731fe4564bf0a61b9460c 100644 (file)
@@ -393,6 +393,7 @@ struct bam_device {
        struct device_dma_parameters dma_parms;
        struct bam_chan *channels;
        u32 num_channels;
+       u32 num_ees;
 
        /* execution environment ID, from DT */
        u32 ee;
@@ -934,12 +935,15 @@ static void bam_apply_new_config(struct bam_chan *bchan,
        struct bam_device *bdev = bchan->bdev;
        u32 maxburst;
 
-       if (dir == DMA_DEV_TO_MEM)
-               maxburst = bchan->slave.src_maxburst;
-       else
-               maxburst = bchan->slave.dst_maxburst;
+       if (!bdev->controlled_remotely) {
+               if (dir == DMA_DEV_TO_MEM)
+                       maxburst = bchan->slave.src_maxburst;
+               else
+                       maxburst = bchan->slave.dst_maxburst;
 
-       writel_relaxed(maxburst, bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+               writel_relaxed(maxburst,
+                              bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+       }
 
        bchan->reconfigure = 0;
 }
@@ -1128,15 +1132,19 @@ static int bam_init(struct bam_device *bdev)
        u32 val;
 
        /* read revision and configuration information */
-       val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)) >> NUM_EES_SHIFT;
-       val &= NUM_EES_MASK;
+       if (!bdev->num_ees) {
+               val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION));
+               bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK;
+       }
 
        /* check that configured EE is within range */
-       if (bdev->ee >= val)
+       if (bdev->ee >= bdev->num_ees)
                return -EINVAL;
 
-       val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
-       bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+       if (!bdev->num_channels) {
+               val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
+               bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+       }
 
        if (bdev->controlled_remotely)
                return 0;
@@ -1232,9 +1240,25 @@ static int bam_dma_probe(struct platform_device *pdev)
        bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node,
                                                "qcom,controlled-remotely");
 
+       if (bdev->controlled_remotely) {
+               ret = of_property_read_u32(pdev->dev.of_node, "num-channels",
+                                          &bdev->num_channels);
+               if (ret)
+                       dev_err(bdev->dev, "num-channels unspecified in dt\n");
+
+               ret = of_property_read_u32(pdev->dev.of_node, "qcom,num-ees",
+                                          &bdev->num_ees);
+               if (ret)
+                       dev_err(bdev->dev, "num-ees unspecified in dt\n");
+       }
+
        bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
-       if (IS_ERR(bdev->bamclk))
-               return PTR_ERR(bdev->bamclk);
+       if (IS_ERR(bdev->bamclk)) {
+               if (!bdev->controlled_remotely)
+                       return PTR_ERR(bdev->bamclk);
+
+               bdev->bamclk = NULL;
+       }
 
        ret = clk_prepare_enable(bdev->bamclk);
        if (ret) {
@@ -1309,6 +1333,11 @@ static int bam_dma_probe(struct platform_device *pdev)
        if (ret)
                goto err_unregister_dma;
 
+       if (bdev->controlled_remotely) {
+               pm_runtime_disable(&pdev->dev);
+               return 0;
+       }
+
        pm_runtime_irq_safe(&pdev->dev);
        pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY);
        pm_runtime_use_autosuspend(&pdev->dev);
@@ -1392,7 +1421,8 @@ static int __maybe_unused bam_dma_suspend(struct device *dev)
 {
        struct bam_device *bdev = dev_get_drvdata(dev);
 
-       pm_runtime_force_suspend(dev);
+       if (!bdev->controlled_remotely)
+               pm_runtime_force_suspend(dev);
 
        clk_unprepare(bdev->bamclk);
 
@@ -1408,7 +1438,8 @@ static int __maybe_unused bam_dma_resume(struct device *dev)
        if (ret)
                return ret;
 
-       pm_runtime_force_resume(dev);
+       if (!bdev->controlled_remotely)
+               pm_runtime_force_resume(dev);
 
        return 0;
 }
index d0cacdb0713eca47360e4f5ceedc8dc6428145bb..2a2ccd9c78e4cc1f8e7b7a72b22613530a0c1afe 100644 (file)
@@ -1301,8 +1301,17 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
         * If the cookie doesn't correspond to the currently running transfer
         * then the descriptor hasn't been processed yet, and the residue is
         * equal to the full descriptor size.
+        * Also, a client driver is possible to call this function before
+        * rcar_dmac_isr_channel_thread() runs. In this case, the "desc.running"
+        * will be the next descriptor, and the done list will appear. So, if
+        * the argument cookie matches the done list's cookie, we can assume
+        * the residue is zero.
         */
        if (cookie != desc->async_tx.cookie) {
+               list_for_each_entry(desc, &chan->desc.done, node) {
+                       if (cookie == desc->async_tx.cookie)
+                               return 0;
+               }
                list_for_each_entry(desc, &chan->desc.pending, node) {
                        if (cookie == desc->async_tx.cookie)
                                return desc->size;
@@ -1677,8 +1686,8 @@ static const struct dev_pm_ops rcar_dmac_pm = {
         *   - Wait for the current transfer to complete and stop the device,
         *   - Resume transfers, if any.
         */
-       SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-                                    pm_runtime_force_resume)
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                                     pm_runtime_force_resume)
        SET_RUNTIME_PM_OPS(rcar_dmac_runtime_suspend, rcar_dmac_runtime_resume,
                           NULL)
 };
index 786fc8fcc38ed6a2f2442a18a36bc4ab6ad4c273..8c5807362a257422ea89c805f92dc411bf8ded8e 100644 (file)
@@ -5,6 +5,7 @@
  *
  * Copyright (C) M'boumba Cedric Madianga 2015
  * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ *         Pierre-Yves Mordret <pierre-yves.mordret@st.com>
  *
  * License terms:  GNU General Public License (GPL), version 2
  */
 #define STM32_DMA_LIFCR                        0x0008 /* DMA Low Int Flag Clear Reg */
 #define STM32_DMA_HIFCR                        0x000c /* DMA High Int Flag Clear Reg */
 #define STM32_DMA_TCI                  BIT(5) /* Transfer Complete Interrupt */
+#define STM32_DMA_HTI                  BIT(4) /* Half Transfer Interrupt */
 #define STM32_DMA_TEI                  BIT(3) /* Transfer Error Interrupt */
 #define STM32_DMA_DMEI                 BIT(2) /* Direct Mode Error Interrupt */
 #define STM32_DMA_FEI                  BIT(0) /* FIFO Error Interrupt */
+#define STM32_DMA_MASKI                        (STM32_DMA_TCI \
+                                        | STM32_DMA_TEI \
+                                        | STM32_DMA_DMEI \
+                                        | STM32_DMA_FEI)
 
 /* DMA Stream x Configuration Register */
 #define STM32_DMA_SCR(x)               (0x0010 + 0x18 * (x)) /* x = 0..7 */
@@ -60,7 +66,8 @@
 #define STM32_DMA_SCR_PINC             BIT(9) /* Peripheral increment mode */
 #define STM32_DMA_SCR_CIRC             BIT(8) /* Circular mode */
 #define STM32_DMA_SCR_PFCTRL           BIT(5) /* Peripheral Flow Controller */
-#define STM32_DMA_SCR_TCIE             BIT(4) /* Transfer Cplete Int Enable*/
+#define STM32_DMA_SCR_TCIE             BIT(4) /* Transfer Complete Int Enable
+                                               */
 #define STM32_DMA_SCR_TEIE             BIT(2) /* Transfer Error Int Enable */
 #define STM32_DMA_SCR_DMEIE            BIT(1) /* Direct Mode Err Int Enable */
 #define STM32_DMA_SCR_EN               BIT(0) /* Stream Enable */
 #define STM32_DMA_FIFO_THRESHOLD_FULL                  0x03
 
 #define STM32_DMA_MAX_DATA_ITEMS       0xffff
+/*
+ * Valid transfer starts from @0 to @0xFFFE leading to unaligned scatter
+ * gather at boundary. Thus it's safer to round down this value on FIFO
+ * size (16 Bytes)
+ */
+#define STM32_DMA_ALIGNED_MAX_DATA_ITEMS       \
+       ALIGN_DOWN(STM32_DMA_MAX_DATA_ITEMS, 16)
 #define STM32_DMA_MAX_CHANNELS         0x08
 #define STM32_DMA_MAX_REQUEST_ID       0x08
 #define STM32_DMA_MAX_DATA_PARAM       0x03
+#define STM32_DMA_FIFO_SIZE            16      /* FIFO is 16 bytes */
+#define STM32_DMA_MIN_BURST            4
 #define STM32_DMA_MAX_BURST            16
 
+/* DMA Features */
+#define STM32_DMA_THRESHOLD_FTR_MASK   GENMASK(1, 0)
+#define STM32_DMA_THRESHOLD_FTR_GET(n) ((n) & STM32_DMA_THRESHOLD_FTR_MASK)
+
 enum stm32_dma_width {
        STM32_DMA_BYTE,
        STM32_DMA_HALF_WORD,
@@ -129,11 +149,18 @@ enum stm32_dma_burst_size {
        STM32_DMA_BURST_INCR16,
 };
 
+/**
+ * struct stm32_dma_cfg - STM32 DMA custom configuration
+ * @channel_id: channel ID
+ * @request_line: DMA request
+ * @stream_config: 32bit mask specifying the DMA channel configuration
+ * @features: 32bit mask specifying the DMA Feature list
+ */
 struct stm32_dma_cfg {
        u32 channel_id;
        u32 request_line;
        u32 stream_config;
-       u32 threshold;
+       u32 features;
 };
 
 struct stm32_dma_chan_reg {
@@ -171,6 +198,9 @@ struct stm32_dma_chan {
        u32 next_sg;
        struct dma_slave_config dma_sconfig;
        struct stm32_dma_chan_reg chan_reg;
+       u32 threshold;
+       u32 mem_burst;
+       u32 mem_width;
 };
 
 struct stm32_dma_device {
@@ -235,6 +265,85 @@ static int stm32_dma_get_width(struct stm32_dma_chan *chan,
        }
 }
 
+static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len,
+                                                      u32 threshold)
+{
+       enum dma_slave_buswidth max_width;
+
+       if (threshold == STM32_DMA_FIFO_THRESHOLD_FULL)
+               max_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       else
+               max_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+       while ((buf_len < max_width  || buf_len % max_width) &&
+              max_width > DMA_SLAVE_BUSWIDTH_1_BYTE)
+               max_width = max_width >> 1;
+
+       return max_width;
+}
+
+static bool stm32_dma_fifo_threshold_is_allowed(u32 burst, u32 threshold,
+                                               enum dma_slave_buswidth width)
+{
+       u32 remaining;
+
+       if (width != DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+               if (burst != 0) {
+                       /*
+                        * If number of beats fit in several whole bursts
+                        * this configuration is allowed.
+                        */
+                       remaining = ((STM32_DMA_FIFO_SIZE / width) *
+                                    (threshold + 1) / 4) % burst;
+
+                       if (remaining == 0)
+                               return true;
+               } else {
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+static bool stm32_dma_is_burst_possible(u32 buf_len, u32 threshold)
+{
+       switch (threshold) {
+       case STM32_DMA_FIFO_THRESHOLD_FULL:
+               if (buf_len >= STM32_DMA_MAX_BURST)
+                       return true;
+               else
+                       return false;
+       case STM32_DMA_FIFO_THRESHOLD_HALFFULL:
+               if (buf_len >= STM32_DMA_MAX_BURST / 2)
+                       return true;
+               else
+                       return false;
+       default:
+               return false;
+       }
+}
+
+static u32 stm32_dma_get_best_burst(u32 buf_len, u32 max_burst, u32 threshold,
+                                   enum dma_slave_buswidth width)
+{
+       u32 best_burst = max_burst;
+
+       if (best_burst == 1 || !stm32_dma_is_burst_possible(buf_len, threshold))
+               return 0;
+
+       while ((buf_len < best_burst * width && best_burst > 1) ||
+              !stm32_dma_fifo_threshold_is_allowed(best_burst, threshold,
+                                                   width)) {
+               if (best_burst > STM32_DMA_MIN_BURST)
+                       best_burst = best_burst >> 1;
+               else
+                       best_burst = 0;
+       }
+
+       return best_burst;
+}
+
 static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst)
 {
        switch (maxburst) {
@@ -254,12 +363,12 @@ static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst)
 }
 
 static void stm32_dma_set_fifo_config(struct stm32_dma_chan *chan,
-                                     u32 src_maxburst, u32 dst_maxburst)
+                                     u32 src_burst, u32 dst_burst)
 {
        chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_MASK;
        chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_DMEIE;
 
-       if ((!src_maxburst) && (!dst_maxburst)) {
+       if (!src_burst && !dst_burst) {
                /* Using direct mode */
                chan->chan_reg.dma_scr |= STM32_DMA_SCR_DMEIE;
        } else {
@@ -300,7 +409,7 @@ static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan)
 
        flags = dma_isr >> (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
 
-       return flags;
+       return flags & STM32_DMA_MASKI;
 }
 
 static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
@@ -315,6 +424,7 @@ static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
         * If (ch % 4) is 2 or 3, left shift the mask by 16 bits.
         * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
         */
+       flags &= STM32_DMA_MASKI;
        dma_ifcr = flags << (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
 
        if (chan->id & 4)
@@ -429,6 +539,8 @@ static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
        dev_dbg(chan2dev(chan), "SFCR:  0x%08x\n", sfcr);
 }
 
+static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan);
+
 static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 {
        struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
@@ -471,6 +583,9 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
        if (status)
                stm32_dma_irq_clear(chan, status);
 
+       if (chan->desc->cyclic)
+               stm32_dma_configure_next_sg(chan);
+
        stm32_dma_dump_reg(chan);
 
        /* Start DMA */
@@ -541,13 +656,29 @@ static irqreturn_t stm32_dma_chan_irq(int irq, void *devid)
        status = stm32_dma_irq_status(chan);
        scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
 
-       if ((status & STM32_DMA_TCI) && (scr & STM32_DMA_SCR_TCIE)) {
+       if (status & STM32_DMA_TCI) {
                stm32_dma_irq_clear(chan, STM32_DMA_TCI);
-               stm32_dma_handle_chan_done(chan);
-
-       } else {
+               if (scr & STM32_DMA_SCR_TCIE)
+                       stm32_dma_handle_chan_done(chan);
+               status &= ~STM32_DMA_TCI;
+       }
+       if (status & STM32_DMA_HTI) {
+               stm32_dma_irq_clear(chan, STM32_DMA_HTI);
+               status &= ~STM32_DMA_HTI;
+       }
+       if (status & STM32_DMA_FEI) {
+               stm32_dma_irq_clear(chan, STM32_DMA_FEI);
+               status &= ~STM32_DMA_FEI;
+               if (!(scr & STM32_DMA_SCR_EN))
+                       dev_err(chan2dev(chan), "FIFO Error\n");
+               else
+                       dev_dbg(chan2dev(chan), "FIFO over/underrun\n");
+       }
+       if (status) {
                stm32_dma_irq_clear(chan, status);
                dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status);
+               if (!(scr & STM32_DMA_SCR_EN))
+                       dev_err(chan2dev(chan), "chan disabled by HW\n");
        }
 
        spin_unlock(&chan->vchan.lock);
@@ -564,45 +695,59 @@ static void stm32_dma_issue_pending(struct dma_chan *c)
        if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) {
                dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan);
                stm32_dma_start_transfer(chan);
-               if (chan->desc->cyclic)
-                       stm32_dma_configure_next_sg(chan);
+
        }
        spin_unlock_irqrestore(&chan->vchan.lock, flags);
 }
 
 static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
                                    enum dma_transfer_direction direction,
-                                   enum dma_slave_buswidth *buswidth)
+                                   enum dma_slave_buswidth *buswidth,
+                                   u32 buf_len)
 {
        enum dma_slave_buswidth src_addr_width, dst_addr_width;
        int src_bus_width, dst_bus_width;
        int src_burst_size, dst_burst_size;
-       u32 src_maxburst, dst_maxburst;
-       u32 dma_scr = 0;
+       u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst;
+       u32 dma_scr, threshold;
 
        src_addr_width = chan->dma_sconfig.src_addr_width;
        dst_addr_width = chan->dma_sconfig.dst_addr_width;
        src_maxburst = chan->dma_sconfig.src_maxburst;
        dst_maxburst = chan->dma_sconfig.dst_maxburst;
+       threshold = chan->threshold;
 
        switch (direction) {
        case DMA_MEM_TO_DEV:
+               /* Set device data size */
                dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
                if (dst_bus_width < 0)
                        return dst_bus_width;
 
-               dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst);
+               /* Set device burst size */
+               dst_best_burst = stm32_dma_get_best_burst(buf_len,
+                                                         dst_maxburst,
+                                                         threshold,
+                                                         dst_addr_width);
+
+               dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
                if (dst_burst_size < 0)
                        return dst_burst_size;
 
-               if (!src_addr_width)
-                       src_addr_width = dst_addr_width;
-
+               /* Set memory data size */
+               src_addr_width = stm32_dma_get_max_width(buf_len, threshold);
+               chan->mem_width = src_addr_width;
                src_bus_width = stm32_dma_get_width(chan, src_addr_width);
                if (src_bus_width < 0)
                        return src_bus_width;
 
-               src_burst_size = stm32_dma_get_burst(chan, src_maxburst);
+               /* Set memory burst size */
+               src_maxburst = STM32_DMA_MAX_BURST;
+               src_best_burst = stm32_dma_get_best_burst(buf_len,
+                                                         src_maxburst,
+                                                         threshold,
+                                                         src_addr_width);
+               src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
                if (src_burst_size < 0)
                        return src_burst_size;
 
@@ -612,27 +757,46 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
                        STM32_DMA_SCR_PBURST(dst_burst_size) |
                        STM32_DMA_SCR_MBURST(src_burst_size);
 
+               /* Set FIFO threshold */
+               chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+               chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold);
+
+               /* Set peripheral address */
                chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr;
                *buswidth = dst_addr_width;
                break;
 
        case DMA_DEV_TO_MEM:
+               /* Set device data size */
                src_bus_width = stm32_dma_get_width(chan, src_addr_width);
                if (src_bus_width < 0)
                        return src_bus_width;
 
-               src_burst_size = stm32_dma_get_burst(chan, src_maxburst);
+               /* Set device burst size */
+               src_best_burst = stm32_dma_get_best_burst(buf_len,
+                                                         src_maxburst,
+                                                         threshold,
+                                                         src_addr_width);
+               chan->mem_burst = src_best_burst;
+               src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
                if (src_burst_size < 0)
                        return src_burst_size;
 
-               if (!dst_addr_width)
-                       dst_addr_width = src_addr_width;
-
+               /* Set memory data size */
+               dst_addr_width = stm32_dma_get_max_width(buf_len, threshold);
+               chan->mem_width = dst_addr_width;
                dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
                if (dst_bus_width < 0)
                        return dst_bus_width;
 
-               dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst);
+               /* Set memory burst size */
+               dst_maxburst = STM32_DMA_MAX_BURST;
+               dst_best_burst = stm32_dma_get_best_burst(buf_len,
+                                                         dst_maxburst,
+                                                         threshold,
+                                                         dst_addr_width);
+               chan->mem_burst = dst_best_burst;
+               dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
                if (dst_burst_size < 0)
                        return dst_burst_size;
 
@@ -642,6 +806,11 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
                        STM32_DMA_SCR_PBURST(src_burst_size) |
                        STM32_DMA_SCR_MBURST(dst_burst_size);
 
+               /* Set FIFO threshold */
+               chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+               chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold);
+
+               /* Set peripheral address */
                chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr;
                *buswidth = chan->dma_sconfig.src_addr_width;
                break;
@@ -651,8 +820,9 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
                return -EINVAL;
        }
 
-       stm32_dma_set_fifo_config(chan, src_maxburst, dst_maxburst);
+       stm32_dma_set_fifo_config(chan, src_best_burst, dst_best_burst);
 
+       /* Set DMA control register */
        chan->chan_reg.dma_scr &= ~(STM32_DMA_SCR_DIR_MASK |
                        STM32_DMA_SCR_PSIZE_MASK | STM32_DMA_SCR_MSIZE_MASK |
                        STM32_DMA_SCR_PBURST_MASK | STM32_DMA_SCR_MBURST_MASK);
@@ -692,10 +862,6 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
        if (!desc)
                return NULL;
 
-       ret = stm32_dma_set_xfer_param(chan, direction, &buswidth);
-       if (ret < 0)
-               goto err;
-
        /* Set peripheral flow controller */
        if (chan->dma_sconfig.device_fc)
                chan->chan_reg.dma_scr |= STM32_DMA_SCR_PFCTRL;
@@ -703,10 +869,15 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
                chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
 
        for_each_sg(sgl, sg, sg_len, i) {
+               ret = stm32_dma_set_xfer_param(chan, direction, &buswidth,
+                                              sg_dma_len(sg));
+               if (ret < 0)
+                       goto err;
+
                desc->sg_req[i].len = sg_dma_len(sg);
 
                nb_data_items = desc->sg_req[i].len / buswidth;
-               if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) {
+               if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
                        dev_err(chan2dev(chan), "nb items not supported\n");
                        goto err;
                }
@@ -767,12 +938,12 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic(
                return NULL;
        }
 
-       ret = stm32_dma_set_xfer_param(chan, direction, &buswidth);
+       ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, period_len);
        if (ret < 0)
                return NULL;
 
        nb_data_items = period_len / buswidth;
-       if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) {
+       if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
                dev_err(chan2dev(chan), "number of items not supported\n");
                return NULL;
        }
@@ -816,35 +987,45 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
        dma_addr_t src, size_t len, unsigned long flags)
 {
        struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
-       u32 num_sgs;
+       enum dma_slave_buswidth max_width;
        struct stm32_dma_desc *desc;
        size_t xfer_count, offset;
+       u32 num_sgs, best_burst, dma_burst, threshold;
        int i;
 
-       num_sgs = DIV_ROUND_UP(len, STM32_DMA_MAX_DATA_ITEMS);
+       num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
        desc = stm32_dma_alloc_desc(num_sgs);
        if (!desc)
                return NULL;
 
+       threshold = chan->threshold;
+
        for (offset = 0, i = 0; offset < len; offset += xfer_count, i++) {
                xfer_count = min_t(size_t, len - offset,
-                                  STM32_DMA_MAX_DATA_ITEMS);
+                                  STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
 
-               desc->sg_req[i].len = xfer_count;
+               /* Compute best burst size */
+               max_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+               best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST,
+                                                     threshold, max_width);
+               dma_burst = stm32_dma_get_burst(chan, best_burst);
 
                stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
                desc->sg_req[i].chan_reg.dma_scr =
                        STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) |
+                       STM32_DMA_SCR_PBURST(dma_burst) |
+                       STM32_DMA_SCR_MBURST(dma_burst) |
                        STM32_DMA_SCR_MINC |
                        STM32_DMA_SCR_PINC |
                        STM32_DMA_SCR_TCIE |
                        STM32_DMA_SCR_TEIE;
-               desc->sg_req[i].chan_reg.dma_sfcr = STM32_DMA_SFCR_DMDIS |
-                       STM32_DMA_SFCR_FTH(STM32_DMA_FIFO_THRESHOLD_FULL) |
-                       STM32_DMA_SFCR_FEIE;
+               desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
+               desc->sg_req[i].chan_reg.dma_sfcr |=
+                       STM32_DMA_SFCR_FTH(threshold);
                desc->sg_req[i].chan_reg.dma_spar = src + offset;
                desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset;
                desc->sg_req[i].chan_reg.dma_sndtr = xfer_count;
+               desc->sg_req[i].len = xfer_count;
        }
 
        desc->num_sgs = num_sgs;
@@ -869,6 +1050,7 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
                                     struct stm32_dma_desc *desc,
                                     u32 next_sg)
 {
+       u32 modulo, burst_size;
        u32 residue = 0;
        int i;
 
@@ -876,8 +1058,10 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
         * In cyclic mode, for the last period, residue = remaining bytes from
         * NDTR
         */
-       if (chan->desc->cyclic && next_sg == 0)
-               return stm32_dma_get_remaining_bytes(chan);
+       if (chan->desc->cyclic && next_sg == 0) {
+               residue = stm32_dma_get_remaining_bytes(chan);
+               goto end;
+       }
 
        /*
         * For all other periods in cyclic mode, and in sg mode,
@@ -888,6 +1072,15 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
                residue += desc->sg_req[i].len;
        residue += stm32_dma_get_remaining_bytes(chan);
 
+end:
+       if (!chan->mem_burst)
+               return residue;
+
+       burst_size = chan->mem_burst * chan->mem_width;
+       modulo = residue % burst_size;
+       if (modulo)
+               residue = residue - modulo + burst_size;
+
        return residue;
 }
 
@@ -902,7 +1095,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
        u32 residue = 0;
 
        status = dma_cookie_status(c, cookie, state);
-       if ((status == DMA_COMPLETE) || (!state))
+       if (status == DMA_COMPLETE || !state)
                return status;
 
        spin_lock_irqsave(&chan->vchan.lock, flags);
@@ -966,7 +1159,7 @@ static void stm32_dma_desc_free(struct virt_dma_desc *vdesc)
 }
 
 static void stm32_dma_set_config(struct stm32_dma_chan *chan,
-                         struct stm32_dma_cfg *cfg)
+                                struct stm32_dma_cfg *cfg)
 {
        stm32_dma_clear_reg(&chan->chan_reg);
 
@@ -976,7 +1169,7 @@ static void stm32_dma_set_config(struct stm32_dma_chan *chan,
        /* Enable Interrupts  */
        chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE;
 
-       chan->chan_reg.dma_sfcr = cfg->threshold & STM32_DMA_SFCR_FTH_MASK;
+       chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features);
 }
 
 static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
@@ -996,10 +1189,10 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
        cfg.channel_id = dma_spec->args[0];
        cfg.request_line = dma_spec->args[1];
        cfg.stream_config = dma_spec->args[2];
-       cfg.threshold = dma_spec->args[3];
+       cfg.features = dma_spec->args[3];
 
-       if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) ||
-           (cfg.request_line >= STM32_DMA_MAX_REQUEST_ID)) {
+       if (cfg.channel_id >= STM32_DMA_MAX_CHANNELS ||
+           cfg.request_line >= STM32_DMA_MAX_REQUEST_ID) {
                dev_err(dev, "Bad channel and/or request id\n");
                return NULL;
        }
index 3c7e5b741e37d2d10a2b0ed528bcce6e09183d96..f77cdb3a041f24d7b9af6fc755b1f732310bd564 100644 (file)
@@ -13,6 +13,7 @@ config BCM47XX_NVRAM
 config BCM47XX_SPROM
        bool "Broadcom SPROM driver"
        depends on BCM47XX_NVRAM
+       select GENERIC_NET_UTILS
        help
          Broadcom devices store configuration data in SPROM. Accessing it is
          specific to the bus host type, e.g. PCI(e) devices have it mapped in
index 62aa3cf09b4d07dcf1c56a0515cbe01150a15668..4787f86c8ac127913b0cdf8870af5eeda2d47d91 100644 (file)
@@ -137,20 +137,6 @@ static void nvram_read_leddc(const char *prefix, const char *name,
        *leddc_off_time = (val >> 16) & 0xff;
 }
 
-static void bcm47xx_nvram_parse_macaddr(char *buf, u8 macaddr[6])
-{
-       if (strchr(buf, ':'))
-               sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0],
-                       &macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
-                       &macaddr[5]);
-       else if (strchr(buf, '-'))
-               sscanf(buf, "%hhx-%hhx-%hhx-%hhx-%hhx-%hhx", &macaddr[0],
-                       &macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
-                       &macaddr[5]);
-       else
-               pr_warn("Can not parse mac address: %s\n", buf);
-}
-
 static void nvram_read_macaddr(const char *prefix, const char *name,
                               u8 val[6], bool fallback)
 {
@@ -161,7 +147,9 @@ static void nvram_read_macaddr(const char *prefix, const char *name,
        if (err < 0)
                return;
 
-       bcm47xx_nvram_parse_macaddr(buf, val);
+       strreplace(buf, '-', ':');
+       if (!mac_pton(buf, val))
+               pr_warn("Can not parse mac address: %s\n", buf);
 }
 
 static void nvram_read_alpha2(const char *prefix, const char *name,
index a4f68affc13bc4ef13ab0f7790f306cd5be21625..d39400e5bc42907866b4cf7d734ee7f6118c5399 100644 (file)
@@ -89,14 +89,14 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
         */
        if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
                ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
-                               mem_region, mem_phys, mem_size);
+                               mem_region, mem_phys, mem_size, NULL);
        } else {
                char newname[strlen("qcom/") + strlen(fwname) + 1];
 
                sprintf(newname, "qcom/%s", fwname);
 
                ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
-                               mem_region, mem_phys, mem_size);
+                               mem_region, mem_phys, mem_size, NULL);
        }
        if (ret)
                goto out;
index 033e57366d56bffd64e3130b60c5903e0ce1ac89..f249a442845804d8f22cdab29ec7e4a708874566 100644 (file)
@@ -1231,8 +1231,9 @@ config SENSORS_NCT6775
        help
          If you say yes here you get support for the hardware monitoring
          functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D,
-         NCT6791D, NCT6792D, NCT6793D, and compatible Super-I/O chips. This
-         driver replaces the w83627ehf driver for NCT6775F and NCT6776F.
+         NCT6791D, NCT6792D, NCT6793D, NCT6795D, NCT6796D, and compatible
+         Super-I/O chips. This driver replaces the w83627ehf driver for
+         NCT6775F and NCT6776F.
 
          This driver can also be built as a module.  If so, the module
          will be called nct6775.
index 6d1208b2b6d2e1bfff864a7e5facd0ae801af1ac..6c83c385a7ca88206e998ea6ef3500e749f4c247 100644 (file)
@@ -128,7 +128,6 @@ enum g762_regs {
                         G762_REG_FAN_CMD2_GEAR_MODE_1)) >> 2))
 
 struct g762_data {
-       struct device *hwmon_dev;
        struct i2c_client *client;
        struct clk *clk;
 
@@ -594,6 +593,14 @@ MODULE_DEVICE_TABLE(of, g762_dt_match);
  * call to g762_of_clock_disable(). Note that a reference to clock is kept
  * in our private data structure to be used in this function.
  */
+static void g762_of_clock_disable(void *data)
+{
+       struct g762_data *g762 = data;
+
+       clk_disable_unprepare(g762->clk);
+       clk_put(g762->clk);
+}
+
 static int g762_of_clock_enable(struct i2c_client *client)
 {
        struct g762_data *data;
@@ -626,6 +633,7 @@ static int g762_of_clock_enable(struct i2c_client *client)
        data = i2c_get_clientdata(client);
        data->clk = clk;
 
+       devm_add_action(&client->dev, g762_of_clock_disable, data);
        return 0;
 
  clk_unprep:
@@ -637,17 +645,6 @@ static int g762_of_clock_enable(struct i2c_client *client)
        return ret;
 }
 
-static void g762_of_clock_disable(struct i2c_client *client)
-{
-       struct g762_data *data = i2c_get_clientdata(client);
-
-       if (!data->clk)
-               return;
-
-       clk_disable_unprepare(data->clk);
-       clk_put(data->clk);
-}
-
 static int g762_of_prop_import_one(struct i2c_client *client,
                                   const char *pname,
                                   int (*psetter)(struct device *dev,
@@ -698,8 +695,6 @@ static int g762_of_clock_enable(struct i2c_client *client)
 {
        return 0;
 }
-
-static void g762_of_clock_disable(struct i2c_client *client) { }
 #endif
 
 /*
@@ -1054,6 +1049,7 @@ static inline int g762_fan_init(struct device *dev)
 static int g762_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
        struct device *dev = &client->dev;
+       struct device *hwmon_dev;
        struct g762_data *data;
        int ret;
 
@@ -1080,35 +1076,15 @@ static int g762_probe(struct i2c_client *client, const struct i2c_device_id *id)
                return ret;
        ret = g762_of_prop_import(client);
        if (ret)
-               goto clock_dis;
+               return ret;
        /* ... or platform_data */
        ret = g762_pdata_prop_import(client);
        if (ret)
-               goto clock_dis;
+               return ret;
 
-       data->hwmon_dev = hwmon_device_register_with_groups(dev, client->name,
+       hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
                                                            data, g762_groups);
-       if (IS_ERR(data->hwmon_dev)) {
-               ret = PTR_ERR(data->hwmon_dev);
-               goto clock_dis;
-       }
-
-       return 0;
-
- clock_dis:
-       g762_of_clock_disable(client);
-
-       return ret;
-}
-
-static int g762_remove(struct i2c_client *client)
-{
-       struct g762_data *data = i2c_get_clientdata(client);
-
-       hwmon_device_unregister(data->hwmon_dev);
-       g762_of_clock_disable(client);
-
-       return 0;
+       return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
 static struct i2c_driver g762_driver = {
@@ -1117,7 +1093,6 @@ static struct i2c_driver g762_driver = {
                .of_match_table = of_match_ptr(g762_dt_match),
        },
        .probe    = g762_probe,
-       .remove   = g762_remove,
        .id_table = g762_id,
 };
 
index 2a91974a10bbd30c36906e3591e0bb30e363aa35..d40fe5122e941c345fd0656aea597fbec2daaf81 100644 (file)
@@ -52,6 +52,7 @@
  */
 static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b,
                                                I2C_CLIENT_END };
+enum chips { lm92, max6635 };
 
 /* The LM92 registers */
 #define LM92_REG_CONFIG                        0x01 /* 8-bit, RW */
@@ -259,62 +260,6 @@ static void lm92_init_client(struct i2c_client *client)
                                          config & 0xFE);
 }
 
-/*
- * The MAX6635 has no identification register, so we have to use tricks
- * to identify it reliably. This is somewhat slow.
- * Note that we do NOT rely on the 2 MSB of the configuration register
- * always reading 0, as suggested by the datasheet, because it was once
- * reported not to be true.
- */
-static int max6635_check(struct i2c_client *client)
-{
-       u16 temp_low, temp_high, temp_hyst, temp_crit;
-       u8 conf;
-       int i;
-
-       /*
-        * No manufacturer ID register, so a read from this address will
-        * always return the last read value.
-        */
-       temp_low = i2c_smbus_read_word_data(client, LM92_REG_TEMP_LOW);
-       if (i2c_smbus_read_word_data(client, LM92_REG_MAN_ID) != temp_low)
-               return 0;
-       temp_high = i2c_smbus_read_word_data(client, LM92_REG_TEMP_HIGH);
-       if (i2c_smbus_read_word_data(client, LM92_REG_MAN_ID) != temp_high)
-               return 0;
-
-       /* Limits are stored as integer values (signed, 9-bit). */
-       if ((temp_low & 0x7f00) || (temp_high & 0x7f00))
-               return 0;
-       temp_hyst = i2c_smbus_read_word_data(client, LM92_REG_TEMP_HYST);
-       temp_crit = i2c_smbus_read_word_data(client, LM92_REG_TEMP_CRIT);
-       if ((temp_hyst & 0x7f00) || (temp_crit & 0x7f00))
-               return 0;
-
-       /*
-        * Registers addresses were found to cycle over 16-byte boundaries.
-        * We don't test all registers with all offsets so as to save some
-        * reads and time, but this should still be sufficient to dismiss
-        * non-MAX6635 chips.
-        */
-       conf = i2c_smbus_read_byte_data(client, LM92_REG_CONFIG);
-       for (i = 16; i < 96; i *= 2) {
-               if (temp_hyst != i2c_smbus_read_word_data(client,
-                                LM92_REG_TEMP_HYST + i - 16)
-                || temp_crit != i2c_smbus_read_word_data(client,
-                                LM92_REG_TEMP_CRIT + i)
-                || temp_low != i2c_smbus_read_word_data(client,
-                               LM92_REG_TEMP_LOW + i + 16)
-                || temp_high != i2c_smbus_read_word_data(client,
-                                LM92_REG_TEMP_HIGH + i + 32)
-                || conf != i2c_smbus_read_byte_data(client,
-                           LM92_REG_CONFIG + i))
-                       return 0;
-       }
-
-       return 1;
-}
-
 static struct attribute *lm92_attrs[] = {
        &sensor_dev_attr_temp1_input.dev_attr.attr,
        &sensor_dev_attr_temp1_crit.dev_attr.attr,
@@ -348,8 +293,6 @@ static int lm92_detect(struct i2c_client *new_client,
 
        if ((config & 0xe0) == 0x00 && man_id == 0x0180)
                pr_info("lm92: Found National Semiconductor LM92 chip\n");
-       else if (max6635_check(new_client))
-               pr_info("lm92: Found Maxim MAX6635 chip\n");
        else
                return -ENODEV;
 
@@ -387,8 +330,8 @@ static int lm92_probe(struct i2c_client *new_client,
  */
 
 static const struct i2c_device_id lm92_id[] = {
-       { "lm92", 0 },
-       /* max6635 could be added here */
+       { "lm92", lm92 },
+       { "max6635", max6635 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, lm92_id);
index c219e43b8f026faa69e4617cecc2b31e7449a064..aebce560bfaf39bbe826ab8cf3a1e2c5fb5895ca 100644 (file)
@@ -41,7 +41,7 @@
  * nct6792d    15      6       6       2+6    0xc910 0xc1    0x5ca3
  * nct6793d    15      6       6       2+6    0xd120 0xc1    0x5ca3
  * nct6795d    14      6       6       2+6    0xd350 0xc1    0x5ca3
- *
+ * nct6796d    14      7       7       2+6    0xd420 0xc1    0x5ca3
  *
  * #temp lists the number of monitored temperature sources (first value) plus
  * the number of directly connectable temperature sensors (second value).
@@ -68,7 +68,7 @@
 #define USE_ALTERNATE
 
 enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793,
-            nct6795 };
+            nct6795, nct6796 };
 
 /* used to set data->name = nct6775_device_names[data->sio_kind] */
 static const char * const nct6775_device_names[] = {
@@ -80,6 +80,7 @@ static const char * const nct6775_device_names[] = {
        "nct6792",
        "nct6793",
        "nct6795",
+       "nct6796",
 };
 
 static const char * const nct6775_sio_names[] __initconst = {
@@ -91,6 +92,7 @@ static const char * const nct6775_sio_names[] __initconst = {
        "NCT6792D",
        "NCT6793D",
        "NCT6795D",
+       "NCT6796D",
 };
 
 static unsigned short force_id;
@@ -125,6 +127,7 @@ MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal");
 #define SIO_NCT6792_ID         0xc910
 #define SIO_NCT6793_ID         0xd120
 #define SIO_NCT6795_ID         0xd350
+#define SIO_NCT6796_ID         0xd420
 #define SIO_ID_MASK            0xFFF0
 
 enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 };
@@ -201,7 +204,7 @@ superio_exit(int ioreg)
 #define NUM_REG_ALARM  7       /* Max number of alarm registers */
 #define NUM_REG_BEEP   5       /* Max number of beep registers */
 
-#define NUM_FAN                6
+#define NUM_FAN                7
 
 #define TEMP_SOURCE_VIRTUAL    0x1f
 
@@ -272,26 +275,26 @@ static const u8 NCT6775_PWM_MODE_MASK[] = { 0x01, 0x02, 0x01 };
 /* Advanced Fan control, some values are common for all fans */
 
 static const u16 NCT6775_REG_TARGET[] = {
-       0x101, 0x201, 0x301, 0x801, 0x901, 0xa01 };
+       0x101, 0x201, 0x301, 0x801, 0x901, 0xa01, 0xb01 };
 static const u16 NCT6775_REG_FAN_MODE[] = {
-       0x102, 0x202, 0x302, 0x802, 0x902, 0xa02 };
+       0x102, 0x202, 0x302, 0x802, 0x902, 0xa02, 0xb02 };
 static const u16 NCT6775_REG_FAN_STEP_DOWN_TIME[] = {
-       0x103, 0x203, 0x303, 0x803, 0x903, 0xa03 };
+       0x103, 0x203, 0x303, 0x803, 0x903, 0xa03, 0xb03 };
 static const u16 NCT6775_REG_FAN_STEP_UP_TIME[] = {
-       0x104, 0x204, 0x304, 0x804, 0x904, 0xa04 };
+       0x104, 0x204, 0x304, 0x804, 0x904, 0xa04, 0xb04 };
 static const u16 NCT6775_REG_FAN_STOP_OUTPUT[] = {
-       0x105, 0x205, 0x305, 0x805, 0x905, 0xa05 };
+       0x105, 0x205, 0x305, 0x805, 0x905, 0xa05, 0xb05 };
 static const u16 NCT6775_REG_FAN_START_OUTPUT[] = {
-       0x106, 0x206, 0x306, 0x806, 0x906, 0xa06 };
+       0x106, 0x206, 0x306, 0x806, 0x906, 0xa06, 0xb06 };
 static const u16 NCT6775_REG_FAN_MAX_OUTPUT[] = { 0x10a, 0x20a, 0x30a };
 static const u16 NCT6775_REG_FAN_STEP_OUTPUT[] = { 0x10b, 0x20b, 0x30b };
 
 static const u16 NCT6775_REG_FAN_STOP_TIME[] = {
-       0x107, 0x207, 0x307, 0x807, 0x907, 0xa07 };
+       0x107, 0x207, 0x307, 0x807, 0x907, 0xa07, 0xb07 };
 static const u16 NCT6775_REG_PWM[] = {
-       0x109, 0x209, 0x309, 0x809, 0x909, 0xa09 };
+       0x109, 0x209, 0x309, 0x809, 0x909, 0xa09, 0xb09 };
 static const u16 NCT6775_REG_PWM_READ[] = {
-       0x01, 0x03, 0x11, 0x13, 0x15, 0xa09 };
+       0x01, 0x03, 0x11, 0x13, 0x15, 0xa09, 0xb09 };
 
 static const u16 NCT6775_REG_FAN[] = { 0x630, 0x632, 0x634, 0x636, 0x638 };
 static const u16 NCT6775_REG_FAN_MIN[] = { 0x3b, 0x3c, 0x3d };
@@ -314,7 +317,7 @@ static const u16 NCT6775_REG_TEMP_SOURCE[ARRAY_SIZE(NCT6775_REG_TEMP)] = {
        0x621, 0x622, 0x623, 0x624, 0x625, 0x626 };
 
 static const u16 NCT6775_REG_TEMP_SEL[] = {
-       0x100, 0x200, 0x300, 0x800, 0x900, 0xa00 };
+       0x100, 0x200, 0x300, 0x800, 0x900, 0xa00, 0xb00 };
 
 static const u16 NCT6775_REG_WEIGHT_TEMP_SEL[] = {
        0x139, 0x239, 0x339, 0x839, 0x939, 0xa39 };
@@ -330,9 +333,9 @@ static const u16 NCT6775_REG_WEIGHT_TEMP_BASE[] = {
 static const u16 NCT6775_REG_TEMP_OFFSET[] = { 0x454, 0x455, 0x456 };
 
 static const u16 NCT6775_REG_AUTO_TEMP[] = {
-       0x121, 0x221, 0x321, 0x821, 0x921, 0xa21 };
+       0x121, 0x221, 0x321, 0x821, 0x921, 0xa21, 0xb21 };
 static const u16 NCT6775_REG_AUTO_PWM[] = {
-       0x127, 0x227, 0x327, 0x827, 0x927, 0xa27 };
+       0x127, 0x227, 0x327, 0x827, 0x927, 0xa27, 0xb27 };
 
 #define NCT6775_AUTO_TEMP(data, nr, p) ((data)->REG_AUTO_TEMP[nr] + (p))
 #define NCT6775_AUTO_PWM(data, nr, p)  ((data)->REG_AUTO_PWM[nr] + (p))
@@ -340,9 +343,9 @@ static const u16 NCT6775_REG_AUTO_PWM[] = {
 static const u16 NCT6775_REG_CRITICAL_ENAB[] = { 0x134, 0x234, 0x334 };
 
 static const u16 NCT6775_REG_CRITICAL_TEMP[] = {
-       0x135, 0x235, 0x335, 0x835, 0x935, 0xa35 };
+       0x135, 0x235, 0x335, 0x835, 0x935, 0xa35, 0xb35 };
 static const u16 NCT6775_REG_CRITICAL_TEMP_TOLERANCE[] = {
-       0x138, 0x238, 0x338, 0x838, 0x938, 0xa38 };
+       0x138, 0x238, 0x338, 0x838, 0x938, 0xa38, 0xb38 };
 
 static const char *const nct6775_temp_label[] = {
        "",
@@ -414,13 +417,15 @@ static const s8 NCT6776_BEEP_BITS[] = {
        30, 31 };                       /* intrusion0, intrusion1 */
 
 static const u16 NCT6776_REG_TOLERANCE_H[] = {
-       0x10c, 0x20c, 0x30c, 0x80c, 0x90c, 0xa0c };
+       0x10c, 0x20c, 0x30c, 0x80c, 0x90c, 0xa0c, 0xb0c };
 
 static const u8 NCT6776_REG_PWM_MODE[] = { 0x04, 0, 0, 0, 0, 0 };
 static const u8 NCT6776_PWM_MODE_MASK[] = { 0x01, 0, 0, 0, 0, 0 };
 
-static const u16 NCT6776_REG_FAN_MIN[] = { 0x63a, 0x63c, 0x63e, 0x640, 0x642 };
-static const u16 NCT6776_REG_FAN_PULSES[] = { 0x644, 0x645, 0x646, 0, 0 };
+static const u16 NCT6776_REG_FAN_MIN[] = {
+       0x63a, 0x63c, 0x63e, 0x640, 0x642, 0x64a, 0x64c };
+static const u16 NCT6776_REG_FAN_PULSES[] = {
+       0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0 };
 
 static const u16 NCT6776_REG_WEIGHT_DUTY_BASE[] = {
        0x13e, 0x23e, 0x33e, 0x83e, 0x93e, 0xa3e };
@@ -495,15 +500,15 @@ static const s8 NCT6779_BEEP_BITS[] = {
        30, 31 };                       /* intrusion0, intrusion1 */
 
 static const u16 NCT6779_REG_FAN[] = {
-       0x4b0, 0x4b2, 0x4b4, 0x4b6, 0x4b8, 0x4ba };
+       0x4b0, 0x4b2, 0x4b4, 0x4b6, 0x4b8, 0x4ba, 0x660 };
 static const u16 NCT6779_REG_FAN_PULSES[] = {
-       0x644, 0x645, 0x646, 0x647, 0x648, 0x649 };
+       0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0 };
 
 static const u16 NCT6779_REG_CRITICAL_PWM_ENABLE[] = {
-       0x136, 0x236, 0x336, 0x836, 0x936, 0xa36 };
+       0x136, 0x236, 0x336, 0x836, 0x936, 0xa36, 0xb36 };
 #define NCT6779_CRITICAL_PWM_ENABLE_MASK       0x01
 static const u16 NCT6779_REG_CRITICAL_PWM[] = {
-       0x137, 0x237, 0x337, 0x837, 0x937, 0xa37 };
+       0x137, 0x237, 0x337, 0x837, 0x937, 0xa37, 0xb37 };
 
 static const u16 NCT6779_REG_TEMP[] = { 0x27, 0x150 };
 static const u16 NCT6779_REG_TEMP_MON[] = { 0x73, 0x75, 0x77, 0x79, 0x7b };
@@ -570,12 +575,12 @@ static const u16 NCT6779_REG_TEMP_CRIT[32] = {
 
 #define NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE    0x28
 
-static const u16 NCT6791_REG_WEIGHT_TEMP_SEL[6] = { 0, 0x239 };
-static const u16 NCT6791_REG_WEIGHT_TEMP_STEP[6] = { 0, 0x23a };
-static const u16 NCT6791_REG_WEIGHT_TEMP_STEP_TOL[6] = { 0, 0x23b };
-static const u16 NCT6791_REG_WEIGHT_DUTY_STEP[6] = { 0, 0x23c };
-static const u16 NCT6791_REG_WEIGHT_TEMP_BASE[6] = { 0, 0x23d };
-static const u16 NCT6791_REG_WEIGHT_DUTY_BASE[6] = { 0, 0x23e };
+static const u16 NCT6791_REG_WEIGHT_TEMP_SEL[NUM_FAN] = { 0, 0x239 };
+static const u16 NCT6791_REG_WEIGHT_TEMP_STEP[NUM_FAN] = { 0, 0x23a };
+static const u16 NCT6791_REG_WEIGHT_TEMP_STEP_TOL[NUM_FAN] = { 0, 0x23b };
+static const u16 NCT6791_REG_WEIGHT_DUTY_STEP[NUM_FAN] = { 0, 0x23c };
+static const u16 NCT6791_REG_WEIGHT_TEMP_BASE[NUM_FAN] = { 0, 0x23d };
+static const u16 NCT6791_REG_WEIGHT_DUTY_BASE[NUM_FAN] = { 0, 0x23e };
 
 static const u16 NCT6791_REG_ALARM[NUM_REG_ALARM] = {
        0x459, 0x45A, 0x45B, 0x568, 0x45D };
@@ -707,6 +712,43 @@ static const char *const nct6795_temp_label[] = {
 
 #define NCT6795_TEMP_MASK      0xbfffff7e
 
+static const char *const nct6796_temp_label[] = {
+       "",
+       "SYSTIN",
+       "CPUTIN",
+       "AUXTIN0",
+       "AUXTIN1",
+       "AUXTIN2",
+       "AUXTIN3",
+       "AUXTIN4",
+       "SMBUSMASTER 0",
+       "SMBUSMASTER 1",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "",
+       "PECI Agent 0",
+       "PECI Agent 1",
+       "PCH_CHIP_CPU_MAX_TEMP",
+       "PCH_CHIP_TEMP",
+       "PCH_CPU_TEMP",
+       "PCH_MCH_TEMP",
+       "PCH_DIM0_TEMP",
+       "PCH_DIM1_TEMP",
+       "PCH_DIM2_TEMP",
+       "PCH_DIM3_TEMP",
+       "BYTE_TEMP0",
+       "BYTE_TEMP1",
+       "PECI Agent 0 Calibration",
+       "PECI Agent 1 Calibration",
+       "",
+       "Virtual_TEMP"
+};
+
+#define NCT6796_TEMP_MASK      0xbfff03fe
+
 /* NCT6102D/NCT6106D specific data */
 
 #define NCT6106_REG_VBAT       0x318
@@ -1231,11 +1273,13 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
        case nct6792:
        case nct6793:
        case nct6795:
+       case nct6796:
                return reg == 0x150 || reg == 0x153 || reg == 0x155 ||
                  ((reg & 0xfff0) == 0x4b0 && (reg & 0x000f) < 0x0b) ||
                  reg == 0x402 ||
                  reg == 0x63a || reg == 0x63c || reg == 0x63e ||
-                 reg == 0x640 || reg == 0x642 ||
+                 reg == 0x640 || reg == 0x642 || reg == 0x64a ||
+                 reg == 0x64c || reg == 0x660 ||
                  reg == 0x73 || reg == 0x75 || reg == 0x77 || reg == 0x79 ||
                  reg == 0x7b || reg == 0x7d;
        }
@@ -1469,7 +1513,7 @@ static void nct6775_update_pwm(struct device *dev)
                duty_is_dc = data->REG_PWM_MODE[i] &&
                  (nct6775_read_value(data, data->REG_PWM_MODE[i])
                   & data->PWM_MODE_MASK[i]);
-               data->pwm_mode[i] = duty_is_dc;
+               data->pwm_mode[i] = !duty_is_dc;
 
                fanmodecfg = nct6775_read_value(data, data->REG_FAN_MODE[i]);
                for (j = 0; j < ARRAY_SIZE(data->REG_PWM); j++) {
@@ -1584,6 +1628,7 @@ static void nct6775_update_pwm_limits(struct device *dev)
                case nct6792:
                case nct6793:
                case nct6795:
+               case nct6796:
                        reg = nct6775_read_value(data,
                                        data->REG_CRITICAL_PWM_ENABLE[i]);
                        if (reg & data->CRITICAL_PWM_ENABLE_MASK)
@@ -2092,6 +2137,8 @@ static umode_t nct6775_fan_is_visible(struct kobject *kobj,
                return 0;
        if (nr == 2 && data->BEEP_BITS[FAN_ALARM_BASE + fan] == -1)
                return 0;
+       if (nr == 3 && !data->REG_FAN_PULSES[fan])
+               return 0;
        if (nr == 4 && !(data->has_fan_min & BIT(fan)))
                return 0;
        if (nr == 5 && data->kind != nct6775)
@@ -2350,7 +2397,7 @@ show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf)
        struct nct6775_data *data = nct6775_update_device(dev);
        struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
-       return sprintf(buf, "%d\n", !data->pwm_mode[sattr->index]);
+       return sprintf(buf, "%d\n", data->pwm_mode[sattr->index]);
 }
 
 static ssize_t
@@ -2371,9 +2418,9 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
        if (val > 1)
                return -EINVAL;
 
-       /* Setting DC mode is not supported for all chips/channels */
+       /* Setting DC mode (0) is not supported for all chips/channels */
        if (data->REG_PWM_MODE[nr] == 0) {
-               if (val)
+               if (!val)
                        return -EINVAL;
                return count;
        }
@@ -2382,7 +2429,7 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
        data->pwm_mode[nr] = val;
        reg = nct6775_read_value(data, data->REG_PWM_MODE[nr]);
        reg &= ~data->PWM_MODE_MASK[nr];
-       if (val)
+       if (!val)
                reg |= data->PWM_MODE_MASK[nr];
        nct6775_write_value(data, data->REG_PWM_MODE[nr], reg);
        mutex_unlock(&data->update_lock);
@@ -3004,6 +3051,7 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr,
                case nct6792:
                case nct6793:
                case nct6795:
+               case nct6796:
                        nct6775_write_value(data, data->REG_CRITICAL_PWM[nr],
                                            val);
                        reg = nct6775_read_value(data,
@@ -3358,8 +3406,10 @@ static inline void nct6775_init_device(struct nct6775_data *data)
 static void
 nct6775_check_fan_inputs(struct nct6775_data *data)
 {
-       bool fan3pin, fan4pin, fan4min, fan5pin, fan6pin;
-       bool pwm3pin, pwm4pin, pwm5pin, pwm6pin;
+       bool fan3pin = false, fan4pin = false, fan4min = false;
+       bool fan5pin = false, fan6pin = false, fan7pin = false;
+       bool pwm3pin = false, pwm4pin = false, pwm5pin = false;
+       bool pwm6pin = false, pwm7pin = false;
        int sioreg = data->sioreg;
        int regval;
 
@@ -3376,12 +3426,6 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 
                /* On NCT6775, fan4 shares pins with the fdc interface */
                fan4pin = !(superio_inb(sioreg, 0x2A) & 0x80);
-               fan4min = false;
-               fan5pin = false;
-               fan6pin = false;
-               pwm4pin = false;
-               pwm5pin = false;
-               pwm6pin = false;
        } else if (data->kind == nct6776) {
                bool gpok = superio_inb(sioreg, 0x27) & 0x80;
                const char *board_vendor, *board_name;
@@ -3421,25 +3465,15 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
                        fan5pin = superio_inb(sioreg, 0x1C) & 0x02;
 
                fan4min = fan4pin;
-               fan6pin = false;
                pwm3pin = fan3pin;
-               pwm4pin = false;
-               pwm5pin = false;
-               pwm6pin = false;
        } else if (data->kind == nct6106) {
                regval = superio_inb(sioreg, 0x24);
                fan3pin = !(regval & 0x80);
                pwm3pin = regval & 0x08;
-
-               fan4pin = false;
-               fan4min = false;
-               fan5pin = false;
-               fan6pin = false;
-               pwm4pin = false;
-               pwm5pin = false;
-               pwm6pin = false;
-       } else { /* NCT6779D, NCT6791D, NCT6792D, NCT6793D, or NCT6795D */
-               int regval_1b, regval_2a, regval_eb;
+       } else {
+               /* NCT6779D, NCT6791D, NCT6792D, NCT6793D, NCT6795D, NCT6796D */
+               int regval_1b, regval_2a, regval_2f;
+               bool dsw_en;
 
                regval = superio_inb(sioreg, 0x1c);
 
@@ -3460,31 +3494,60 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
                        break;
                case nct6793:
                case nct6795:
+               case nct6796:
                        regval_1b = superio_inb(sioreg, 0x1b);
                        regval_2a = superio_inb(sioreg, 0x2a);
+                       regval_2f = superio_inb(sioreg, 0x2f);
+                       dsw_en = regval_2f & BIT(3);
 
                        if (!pwm5pin)
                                pwm5pin = regval & BIT(7);
-                       fan6pin = regval & BIT(1);
-                       pwm6pin = regval & BIT(0);
+
                        if (!fan5pin)
                                fan5pin = regval_1b & BIT(5);
 
                        superio_select(sioreg, NCT6775_LD_12);
-                       regval_eb = superio_inb(sioreg, 0xeb);
-                       if (!fan5pin)
-                               fan5pin = regval_eb & BIT(5);
-                       if (!pwm5pin)
-                               pwm5pin = (regval_eb & BIT(4)) &&
-                                          !(regval_2a & BIT(0));
-                       if (!fan6pin)
-                               fan6pin = regval_eb & BIT(3);
-                       if (!pwm6pin)
-                               pwm6pin = regval_eb & BIT(2);
+                       if (data->kind != nct6796) {
+                               int regval_eb = superio_inb(sioreg, 0xeb);
+
+                               if (!dsw_en) {
+                                       fan6pin = regval & BIT(1);
+                                       pwm6pin = regval & BIT(0);
+                               }
+
+                               if (!fan5pin)
+                                       fan5pin = regval_eb & BIT(5);
+                               if (!pwm5pin)
+                                       pwm5pin = (regval_eb & BIT(4)) &&
+                                               !(regval_2a & BIT(0));
+                               if (!fan6pin)
+                                       fan6pin = regval_eb & BIT(3);
+                               if (!pwm6pin)
+                                       pwm6pin = regval_eb & BIT(2);
+                       }
+
+                       if (data->kind == nct6795 || data->kind == nct6796) {
+                               int regval_ed = superio_inb(sioreg, 0xed);
+
+                               if (!fan6pin)
+                                       fan6pin = (regval_2a & BIT(4)) &&
+                                         (!dsw_en ||
+                                          (dsw_en && (regval_ed & BIT(4))));
+                               if (!pwm6pin)
+                                       pwm6pin = (regval_2a & BIT(3)) &&
+                                         (regval_ed & BIT(2));
+                       }
+
+                       if (data->kind == nct6796) {
+                               int regval_1d = superio_inb(sioreg, 0x1d);
+                               int regval_2b = superio_inb(sioreg, 0x2b);
+
+                               fan7pin = !(regval_2b & BIT(2));
+                               pwm7pin = !(regval_1d & (BIT(2) | BIT(3)));
+                       }
+
                        break;
                default:        /* NCT6779D */
-                       fan6pin = false;
-                       pwm6pin = false;
                        break;
                }
 
@@ -3493,11 +3556,11 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 
        /* fan 1 and 2 (0x03) are always present */
        data->has_fan = 0x03 | (fan3pin << 2) | (fan4pin << 3) |
-               (fan5pin << 4) | (fan6pin << 5);
+               (fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
        data->has_fan_min = 0x03 | (fan3pin << 2) | (fan4min << 3) |
-               (fan5pin << 4);
+               (fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
        data->has_pwm = 0x03 | (pwm3pin << 2) | (pwm4pin << 3) |
-               (pwm5pin << 4) | (pwm6pin << 5);
+               (pwm5pin << 4) | (pwm6pin << 5) | (pwm7pin << 6);
 }
 
 static void add_temp_sensors(struct nct6775_data *data, const u16 *regp,
@@ -3856,8 +3919,9 @@ static int nct6775_probe(struct platform_device *pdev)
        case nct6792:
        case nct6793:
        case nct6795:
+       case nct6796:
                data->in_num = 15;
-               data->pwm_num = 6;
+               data->pwm_num = (data->kind == nct6796) ? 7 : 6;
                data->auto_pwm_num = 4;
                data->has_fan_div = false;
                data->temp_fixed_num = 6;
@@ -3891,6 +3955,10 @@ static int nct6775_probe(struct platform_device *pdev)
                        data->temp_label = nct6795_temp_label;
                        data->temp_mask = NCT6795_TEMP_MASK;
                        break;
+               case nct6796:
+                       data->temp_label = nct6796_temp_label;
+                       data->temp_mask = NCT6796_TEMP_MASK;
+                       break;
                }
 
                data->REG_CONFIG = NCT6775_REG_CONFIG;
@@ -4159,6 +4227,7 @@ static int nct6775_probe(struct platform_device *pdev)
        case nct6792:
        case nct6793:
        case nct6795:
+       case nct6796:
                break;
        }
 
@@ -4193,6 +4262,7 @@ static int nct6775_probe(struct platform_device *pdev)
                case nct6792:
                case nct6793:
                case nct6795:
+               case nct6796:
                        tmp |= 0x7e;
                        break;
                }
@@ -4291,7 +4361,8 @@ static int __maybe_unused nct6775_resume(struct device *dev)
                superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable);
 
        if (data->kind == nct6791 || data->kind == nct6792 ||
-           data->kind == nct6793 || data->kind == nct6795)
+           data->kind == nct6793 || data->kind == nct6795 ||
+           data->kind == nct6796)
                nct6791_enable_io_mapping(sioreg);
 
        superio_exit(sioreg);
@@ -4391,6 +4462,9 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
        case SIO_NCT6795_ID:
                sio_data->kind = nct6795;
                break;
+       case SIO_NCT6796_ID:
+               sio_data->kind = nct6796;
+               break;
        default:
                if (val != 0xffff)
                        pr_debug("unsupported chip ID: 0x%04x\n", val);
@@ -4417,7 +4491,8 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
        }
 
        if (sio_data->kind == nct6791 || sio_data->kind == nct6792 ||
-           sio_data->kind == nct6793 || sio_data->kind == nct6795)
+           sio_data->kind == nct6793 || sio_data->kind == nct6795 ||
+           sio_data->kind == nct6796)
                nct6791_enable_io_mapping(sioaddr);
 
        superio_exit(sioaddr);
index 6e4298e99222552514ed05c4cd5b53ab8fd904fd..e71aec69e76ef366330f4d147984e8e65072ec29 100644 (file)
@@ -31,8 +31,8 @@ config SENSORS_ADM1275
        default n
        help
          If you say yes here you get hardware monitoring support for Analog
-         Devices ADM1075, ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294
-         Hot-Swap Controller and Digital Power Monitors.
+         Devices ADM1075, ADM1272, ADM1275, ADM1276, ADM1278, ADM1293,
+         and ADM1294 Hot-Swap Controller and Digital Power Monitors.
 
          This driver can also be built as a module. If so, the module will
          be called adm1275.
index 00d6995af4c23c50ea3e0de261bc641bd2ba88ee..13600fa79e7f369a5b74e774d072feff255d5ab4 100644 (file)
@@ -3,6 +3,7 @@
  * and Digital Power Monitor
  *
  * Copyright (c) 2011 Ericsson AB.
+ * Copyright (c) 2018 Guenter Roeck
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -24,7 +25,7 @@
 #include <linux/bitops.h>
 #include "pmbus.h"
 
-enum chips { adm1075, adm1275, adm1276, adm1278, adm1293, adm1294 };
+enum chips { adm1075, adm1272, adm1275, adm1276, adm1278, adm1293, adm1294 };
 
 #define ADM1275_MFR_STATUS_IOUT_WARN2  BIT(0)
 #define ADM1293_MFR_STATUS_VAUX_UV_WARN        BIT(5)
@@ -41,6 +42,8 @@ enum chips { adm1075, adm1275, adm1276, adm1278, adm1293, adm1294 };
 #define ADM1075_IRANGE_25              BIT(3)
 #define ADM1075_IRANGE_MASK            (BIT(3) | BIT(4))
 
+#define ADM1272_IRANGE                 BIT(0)
+
 #define ADM1278_TEMP1_EN               BIT(3)
 #define ADM1278_VIN_EN                 BIT(2)
 #define ADM1278_VOUT_EN                        BIT(1)
@@ -105,6 +108,19 @@ static const struct coefficients adm1075_coefficients[] = {
        [4] = { 4279, 0, -1 },          /* power, irange50 */
 };
 
+static const struct coefficients adm1272_coefficients[] = {
+       [0] = { 6770, 0, -2 },          /* voltage, vrange 60V */
+       [1] = { 4062, 0, -2 },          /* voltage, vrange 100V */
+       [2] = { 1326, 20480, -1 },      /* current, vsense range 15mV */
+       [3] = { 663, 20480, -1 },       /* current, vsense range 30mV */
+       [4] = { 3512, 0, -2 },          /* power, vrange 60V, irange 15mV */
+       [5] = { 21071, 0, -3 },         /* power, vrange 100V, irange 15mV */
+       [6] = { 17561, 0, -3 },         /* power, vrange 60V, irange 30mV */
+       [7] = { 10535, 0, -3 },         /* power, vrange 100V, irange 30mV */
+       [8] = { 42, 31871, -1 },        /* temperature */
+
+};
+
 static const struct coefficients adm1275_coefficients[] = {
        [0] = { 19199, 0, -2 },         /* voltage, vrange set */
        [1] = { 6720, 0, -1 },          /* voltage, vrange not set */
@@ -154,7 +170,7 @@ static int adm1275_read_word_data(struct i2c_client *client, int page, int reg)
        const struct adm1275_data *data = to_adm1275_data(info);
        int ret = 0;
 
-       if (page)
+       if (page > 0)
                return -ENXIO;
 
        switch (reg) {
@@ -240,7 +256,7 @@ static int adm1275_write_word_data(struct i2c_client *client, int page, int reg,
        const struct adm1275_data *data = to_adm1275_data(info);
        int ret;
 
-       if (page)
+       if (page > 0)
                return -ENXIO;
 
        switch (reg) {
@@ -335,6 +351,7 @@ static int adm1275_read_byte_data(struct i2c_client *client, int page, int reg)
 
 static const struct i2c_device_id adm1275_id[] = {
        { "adm1075", adm1075 },
+       { "adm1272", adm1272 },
        { "adm1275", adm1275 },
        { "adm1276", adm1276 },
        { "adm1278", adm1278 },
@@ -451,6 +468,54 @@ static int adm1275_probe(struct i2c_client *client,
                        info->func[0] |=
                          PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT;
                break;
+       case adm1272:
+               data->have_vout = true;
+               data->have_pin_max = true;
+               data->have_temp_max = true;
+
+               coefficients = adm1272_coefficients;
+               vindex = (config & ADM1275_VRANGE) ? 1 : 0;
+               cindex = (config & ADM1272_IRANGE) ? 3 : 2;
+               /* pindex depends on the combination of the above */
+               switch (config & (ADM1275_VRANGE | ADM1272_IRANGE)) {
+               case 0:
+               default:
+                       pindex = 4;
+                       break;
+               case ADM1275_VRANGE:
+                       pindex = 5;
+                       break;
+               case ADM1272_IRANGE:
+                       pindex = 6;
+                       break;
+               case ADM1275_VRANGE | ADM1272_IRANGE:
+                       pindex = 7;
+                       break;
+               }
+               tindex = 8;
+
+               info->func[0] |= PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT |
+                       PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT;
+
+               /* Enable VOUT if not enabled (it is disabled by default) */
+               if (!(config & ADM1278_VOUT_EN)) {
+                       config |= ADM1278_VOUT_EN;
+                       ret = i2c_smbus_write_byte_data(client,
+                                                       ADM1275_PMON_CONFIG,
+                                                       config);
+                       if (ret < 0) {
+                               dev_err(&client->dev,
+                                       "Failed to enable VOUT monitoring\n");
+                               return -ENODEV;
+                       }
+               }
+
+               if (config & ADM1278_TEMP1_EN)
+                       info->func[0] |=
+                               PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP;
+               if (config & ADM1278_VIN_EN)
+                       info->func[0] |= PMBUS_HAVE_VIN;
+               break;
        case adm1275:
                if (device_config & ADM1275_IOUT_WARN2_SELECT)
                        data->have_oc_fault = true;
index dd4883a19045601fcccac2cebea02d987a728e02..e951f9b87abb0cd4caeb854bd0936f6175657e30 100644 (file)
@@ -45,7 +45,7 @@ static int max8688_read_word_data(struct i2c_client *client, int page, int reg)
 {
        int ret;
 
-       if (page)
+       if (page > 0)
                return -ENXIO;
 
        switch (reg) {
index b74dbeca2e8d89908b31c173f6e8d0ec2faaceb5..70cecb06f93cae04f52f8cd3ff02a3a327d406ef 100644 (file)
@@ -19,6 +19,7 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/debugfs.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -27,6 +28,8 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/pmbus.h>
+#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include "pmbus.h"
 
 enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
@@ -35,8 +38,19 @@ enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
 #define UCD9000_NUM_PAGES              0xd6
 #define UCD9000_FAN_CONFIG_INDEX       0xe7
 #define UCD9000_FAN_CONFIG             0xe8
+#define UCD9000_MFR_STATUS             0xf3
+#define UCD9000_GPIO_SELECT            0xfa
+#define UCD9000_GPIO_CONFIG            0xfb
 #define UCD9000_DEVICE_ID              0xfd
 
+/* GPIO CONFIG bits */
+#define UCD9000_GPIO_CONFIG_ENABLE     BIT(0)
+#define UCD9000_GPIO_CONFIG_OUT_ENABLE BIT(1)
+#define UCD9000_GPIO_CONFIG_OUT_VALUE  BIT(2)
+#define UCD9000_GPIO_CONFIG_STATUS     BIT(3)
+#define UCD9000_GPIO_INPUT             0
+#define UCD9000_GPIO_OUTPUT            1
+
 #define UCD9000_MON_TYPE(x)    (((x) >> 5) & 0x07)
 #define UCD9000_MON_PAGE(x)    ((x) & 0x0f)
 
@@ -47,12 +61,29 @@ enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
 
 #define UCD9000_NUM_FAN                4
 
+#define UCD9000_GPIO_NAME_LEN  16
+#define UCD9090_NUM_GPIOS      23
+#define UCD901XX_NUM_GPIOS     26
+#define UCD90910_NUM_GPIOS     26
+
+#define UCD9000_DEBUGFS_NAME_LEN       24
+#define UCD9000_GPI_COUNT              8
+
 struct ucd9000_data {
        u8 fan_data[UCD9000_NUM_FAN][I2C_SMBUS_BLOCK_MAX];
        struct pmbus_driver_info info;
+#ifdef CONFIG_GPIOLIB
+       struct gpio_chip gpio;
+#endif
+       struct dentry *debugfs;
 };
 #define to_ucd9000_data(_info) container_of(_info, struct ucd9000_data, info)
 
+struct ucd9000_debugfs_entry {
+       struct i2c_client *client;
+       u8 index;
+};
+
 static int ucd9000_get_fan_config(struct i2c_client *client, int fan)
 {
        int fan_config = 0;
@@ -149,6 +180,312 @@ static const struct of_device_id ucd9000_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ucd9000_of_match);
 
+#ifdef CONFIG_GPIOLIB
+static int ucd9000_gpio_read_config(struct i2c_client *client,
+                                   unsigned int offset)
+{
+       int ret;
+
+       /* No page set required */
+       ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_SELECT, offset);
+       if (ret < 0)
+               return ret;
+
+       return i2c_smbus_read_byte_data(client, UCD9000_GPIO_CONFIG);
+}
+
+static int ucd9000_gpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+       struct i2c_client *client  = gpiochip_get_data(gc);
+       int ret;
+
+       ret = ucd9000_gpio_read_config(client, offset);
+       if (ret < 0)
+               return ret;
+
+       return !!(ret & UCD9000_GPIO_CONFIG_STATUS);
+}
+
+static void ucd9000_gpio_set(struct gpio_chip *gc, unsigned int offset,
+                            int value)
+{
+       struct i2c_client *client = gpiochip_get_data(gc);
+       int ret;
+
+       ret = ucd9000_gpio_read_config(client, offset);
+       if (ret < 0) {
+               dev_dbg(&client->dev, "failed to read GPIO %d config: %d\n",
+                       offset, ret);
+               return;
+       }
+
+       if (value) {
+               if (ret & UCD9000_GPIO_CONFIG_STATUS)
+                       return;
+
+               ret |= UCD9000_GPIO_CONFIG_STATUS;
+       } else {
+               if (!(ret & UCD9000_GPIO_CONFIG_STATUS))
+                       return;
+
+               ret &= ~UCD9000_GPIO_CONFIG_STATUS;
+       }
+
+       ret |= UCD9000_GPIO_CONFIG_ENABLE;
+
+       /* Page set not required */
+       ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, ret);
+       if (ret < 0) {
+               dev_dbg(&client->dev, "Failed to write GPIO %d config: %d\n",
+                       offset, ret);
+               return;
+       }
+
+       ret &= ~UCD9000_GPIO_CONFIG_ENABLE;
+
+       ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, ret);
+       if (ret < 0)
+               dev_dbg(&client->dev, "Failed to write GPIO %d config: %d\n",
+                       offset, ret);
+}
+
+static int ucd9000_gpio_get_direction(struct gpio_chip *gc,
+                                     unsigned int offset)
+{
+       struct i2c_client *client = gpiochip_get_data(gc);
+       int ret;
+
+       ret = ucd9000_gpio_read_config(client, offset);
+       if (ret < 0)
+               return ret;
+
+       return !(ret & UCD9000_GPIO_CONFIG_OUT_ENABLE);
+}
+
+static int ucd9000_gpio_set_direction(struct gpio_chip *gc,
+                                     unsigned int offset, bool direction_out,
+                                     int requested_out)
+{
+       struct i2c_client *client = gpiochip_get_data(gc);
+       int ret, config, out_val;
+
+       ret = ucd9000_gpio_read_config(client, offset);
+       if (ret < 0)
+               return ret;
+
+       if (direction_out) {
+               out_val = requested_out ? UCD9000_GPIO_CONFIG_OUT_VALUE : 0;
+
+               if (ret & UCD9000_GPIO_CONFIG_OUT_ENABLE) {
+                       if ((ret & UCD9000_GPIO_CONFIG_OUT_VALUE) == out_val)
+                               return 0;
+               } else {
+                       ret |= UCD9000_GPIO_CONFIG_OUT_ENABLE;
+               }
+
+               if (out_val)
+                       ret |= UCD9000_GPIO_CONFIG_OUT_VALUE;
+               else
+                       ret &= ~UCD9000_GPIO_CONFIG_OUT_VALUE;
+
+       } else {
+               if (!(ret & UCD9000_GPIO_CONFIG_OUT_ENABLE))
+                       return 0;
+
+               ret &= ~UCD9000_GPIO_CONFIG_OUT_ENABLE;
+       }
+
+       ret |= UCD9000_GPIO_CONFIG_ENABLE;
+       config = ret;
+
+       /* Page set not required */
+       ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, config);
+       if (ret < 0)
+               return ret;
+
+       config &= ~UCD9000_GPIO_CONFIG_ENABLE;
+
+       return i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, config);
+}
+
+static int ucd9000_gpio_direction_input(struct gpio_chip *gc,
+                                       unsigned int offset)
+{
+       return ucd9000_gpio_set_direction(gc, offset, UCD9000_GPIO_INPUT, 0);
+}
+
+static int ucd9000_gpio_direction_output(struct gpio_chip *gc,
+                                        unsigned int offset, int val)
+{
+       return ucd9000_gpio_set_direction(gc, offset, UCD9000_GPIO_OUTPUT,
+                                         val);
+}
+
+static void ucd9000_probe_gpio(struct i2c_client *client,
+                              const struct i2c_device_id *mid,
+                              struct ucd9000_data *data)
+{
+       int rc;
+
+       switch (mid->driver_data) {
+       case ucd9090:
+               data->gpio.ngpio = UCD9090_NUM_GPIOS;
+               break;
+       case ucd90120:
+       case ucd90124:
+       case ucd90160:
+               data->gpio.ngpio = UCD901XX_NUM_GPIOS;
+               break;
+       case ucd90910:
+               data->gpio.ngpio = UCD90910_NUM_GPIOS;
+               break;
+       default:
+               return; /* GPIO support is optional. */
+       }
+
+       /*
+        * Pinmux support has not been added to the new gpio_chip.
+        * This support should be added when possible given the mux
+        * behavior of these IO devices.
+        */
+       data->gpio.label = client->name;
+       data->gpio.get_direction = ucd9000_gpio_get_direction;
+       data->gpio.direction_input = ucd9000_gpio_direction_input;
+       data->gpio.direction_output = ucd9000_gpio_direction_output;
+       data->gpio.get = ucd9000_gpio_get;
+       data->gpio.set = ucd9000_gpio_set;
+       data->gpio.can_sleep = true;
+       data->gpio.base = -1;
+       data->gpio.parent = &client->dev;
+
+       rc = devm_gpiochip_add_data(&client->dev, &data->gpio, client);
+       if (rc)
+               dev_warn(&client->dev, "Could not add gpiochip: %d\n", rc);
+}
+#else
+static void ucd9000_probe_gpio(struct i2c_client *client,
+                              const struct i2c_device_id *mid,
+                              struct ucd9000_data *data)
+{
+}
+#endif /* CONFIG_GPIOLIB */
+
+#ifdef CONFIG_DEBUG_FS
+static int ucd9000_get_mfr_status(struct i2c_client *client, u8 *buffer)
+{
+       int ret = pmbus_set_page(client, 0);
+
+       if (ret < 0)
+               return ret;
+
+       return i2c_smbus_read_block_data(client, UCD9000_MFR_STATUS, buffer);
+}
+
+static int ucd9000_debugfs_show_mfr_status_bit(void *data, u64 *val)
+{
+       struct ucd9000_debugfs_entry *entry = data;
+       struct i2c_client *client = entry->client;
+       u8 buffer[I2C_SMBUS_BLOCK_MAX];
+       int ret;
+
+       ret = ucd9000_get_mfr_status(client, buffer);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * Attribute only created for devices with gpi fault bits at bits
+        * 16-23, which is the second byte of the response.
+        */
+       *val = !!(buffer[1] & BIT(entry->index));
+
+       return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(ucd9000_debugfs_mfr_status_bit,
+                        ucd9000_debugfs_show_mfr_status_bit, NULL, "%1lld\n");
+
+static ssize_t ucd9000_debugfs_read_mfr_status(struct file *file,
+                                              char __user *buf, size_t count,
+                                              loff_t *ppos)
+{
+       struct i2c_client *client = file->private_data;
+       u8 buffer[I2C_SMBUS_BLOCK_MAX];
+       char str[(I2C_SMBUS_BLOCK_MAX * 2) + 2];
+       char *res;
+       int rc;
+
+       rc = ucd9000_get_mfr_status(client, buffer);
+       if (rc < 0)
+               return rc;
+
+       res = bin2hex(str, buffer, min(rc, I2C_SMBUS_BLOCK_MAX));
+       *res++ = '\n';
+       *res = 0;
+
+       return simple_read_from_buffer(buf, count, ppos, str, res - str);
+}
+
+static const struct file_operations ucd9000_debugfs_show_mfr_status_fops = {
+       .llseek = noop_llseek,
+       .read = ucd9000_debugfs_read_mfr_status,
+       .open = simple_open,
+};
+
+static int ucd9000_init_debugfs(struct i2c_client *client,
+                               const struct i2c_device_id *mid,
+                               struct ucd9000_data *data)
+{
+       struct dentry *debugfs;
+       struct ucd9000_debugfs_entry *entries;
+       int i;
+       char name[UCD9000_DEBUGFS_NAME_LEN];
+
+       debugfs = pmbus_get_debugfs_dir(client);
+       if (!debugfs)
+               return -ENOENT;
+
+       data->debugfs = debugfs_create_dir(client->name, debugfs);
+       if (!data->debugfs)
+               return -ENOENT;
+
+       /*
+        * Of the chips this driver supports, only the UCD9090, UCD90160,
+        * and UCD90910 report GPI faults in their MFR_STATUS register, so only
+        * create the GPI fault debugfs attributes for those chips.
+        */
+       if (mid->driver_data == ucd9090 || mid->driver_data == ucd90160 ||
+           mid->driver_data == ucd90910) {
+               entries = devm_kzalloc(&client->dev,
+                                      sizeof(*entries) * UCD9000_GPI_COUNT,
+                                      GFP_KERNEL);
+               if (!entries)
+                       return -ENOMEM;
+
+               for (i = 0; i < UCD9000_GPI_COUNT; i++) {
+                       entries[i].client = client;
+                       entries[i].index = i;
+                       scnprintf(name, UCD9000_DEBUGFS_NAME_LEN,
+                                 "gpi%d_alarm", i + 1);
+                       debugfs_create_file(name, 0444, data->debugfs,
+                                           &entries[i],
+                                           &ucd9000_debugfs_mfr_status_bit);
+               }
+       }
+
+       scnprintf(name, UCD9000_DEBUGFS_NAME_LEN, "mfr_status");
+       debugfs_create_file(name, 0444, data->debugfs, client,
+                           &ucd9000_debugfs_show_mfr_status_fops);
+
+       return 0;
+}
+#else
+static int ucd9000_init_debugfs(struct i2c_client *client,
+                               const struct i2c_device_id *mid,
+                               struct ucd9000_data *data)
+{
+       return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
+
 static int ucd9000_probe(struct i2c_client *client,
                         const struct i2c_device_id *id)
 {
@@ -263,7 +600,18 @@ static int ucd9000_probe(struct i2c_client *client,
                  | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN34;
        }
 
-       return pmbus_do_probe(client, mid, info);
+       ucd9000_probe_gpio(client, mid, data);
+
+       ret = pmbus_do_probe(client, mid, info);
+       if (ret)
+               return ret;
+
+       ret = ucd9000_init_debugfs(client, mid, data);
+       if (ret)
+               dev_warn(&client->dev, "Failed to register debugfs: %d\n",
+                        ret);
+
+       return 0;
 }
 
 /* This is the driver that will be inserted */
index 190e7b39ce32ff10f844769bf3ab2d11ece68f79..2c7ba70921f5850da4bc9caffe091bf81637c2ca 100644 (file)
@@ -16,8 +16,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA
  *
- * Data sheet available (5/2010) at
- * http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT21.pdf
+ * Data sheet available at http://www.sensirion.com/file/datasheet_sht21
  */
 
 #include <linux/module.h>
index 07a0cb0a1f284fe62de02f84d43c3f30589c3f0a..0e81f287d3051930b5edc4d459d8957b05bfa979 100644 (file)
@@ -136,20 +136,24 @@ static int via_cputemp_probe(struct platform_device *pdev)
        data->id = pdev->id;
        data->name = "via_cputemp";
 
-       switch (c->x86_model) {
-       case 0xA:
-               /* C7 A */
-       case 0xD:
-               /* C7 D */
-               data->msr_temp = 0x1169;
-               data->msr_vid = 0x198;
-               break;
-       case 0xF:
-               /* Nano */
+       if (c->x86 == 7) {
                data->msr_temp = 0x1423;
-               break;
-       default:
-               return -ENODEV;
+       } else {
+               switch (c->x86_model) {
+               case 0xA:
+                       /* C7 A */
+               case 0xD:
+                       /* C7 D */
+                       data->msr_temp = 0x1169;
+                       data->msr_vid = 0x198;
+                       break;
+               case 0xF:
+                       /* Nano */
+                       data->msr_temp = 0x1423;
+                       break;
+               default:
+                       return -ENODEV;
+               }
        }
 
        /* test if we can access the TEMPERATURE MSR */
@@ -283,6 +287,7 @@ static const struct x86_cpu_id __initconst cputemp_ids[] = {
        { X86_VENDOR_CENTAUR, 6, 0xa, }, /* C7 A */
        { X86_VENDOR_CENTAUR, 6, 0xd, }, /* C7 D */
        { X86_VENDOR_CENTAUR, 6, 0xf, }, /* Nano */
+       { X86_VENDOR_CENTAUR, 7, X86_MODEL_ANY, },
        {}
 };
 MODULE_DEVICE_TABLE(x86cpu, cputemp_ids);
index 83819d0cbf909d0da86975183ef2011e56e02aa3..2a99f0f14795549eacb6d88b87e30cc0ca78272e 100644 (file)
  */
 #define AMD_IOMMU_PGSIZES      ((~0xFFFUL) & ~(2ULL << 38))
 
-static DEFINE_RWLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(pd_bitmap_lock);
+static DEFINE_SPINLOCK(iommu_table_lock);
 
 /* List of all available dev_data structures */
-static LIST_HEAD(dev_data_list);
-static DEFINE_SPINLOCK(dev_data_list_lock);
+static LLIST_HEAD(dev_data_list);
 
 LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
@@ -204,40 +205,33 @@ static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
        struct iommu_dev_data *dev_data;
-       unsigned long flags;
 
        dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
        if (!dev_data)
                return NULL;
 
        dev_data->devid = devid;
-
-       spin_lock_irqsave(&dev_data_list_lock, flags);
-       list_add_tail(&dev_data->dev_data_list, &dev_data_list);
-       spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
        ratelimit_default_init(&dev_data->rs);
 
+       llist_add(&dev_data->dev_data_list, &dev_data_list);
        return dev_data;
 }
 
 static struct iommu_dev_data *search_dev_data(u16 devid)
 {
        struct iommu_dev_data *dev_data;
-       unsigned long flags;
+       struct llist_node *node;
 
-       spin_lock_irqsave(&dev_data_list_lock, flags);
-       list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
+       if (llist_empty(&dev_data_list))
+               return NULL;
+
+       node = dev_data_list.first;
+       llist_for_each_entry(dev_data, node, dev_data_list) {
                if (dev_data->devid == devid)
-                       goto out_unlock;
+                       return dev_data;
        }
 
-       dev_data = NULL;
-
-out_unlock:
-       spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
-       return dev_data;
+       return NULL;
 }
 
 static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
@@ -311,6 +305,8 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
 
        if (dev_data == NULL) {
                dev_data = alloc_dev_data(devid);
+               if (!dev_data)
+                       return NULL;
 
                if (translation_pre_enabled(iommu))
                        dev_data->defer_attach = true;
@@ -548,6 +544,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
 
 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
+       struct device *dev = iommu->iommu.dev;
        int type, devid, domid, flags;
        volatile u32 *event = __evt;
        int count = 0;
@@ -574,53 +571,53 @@ retry:
                amd_iommu_report_page_fault(devid, domid, address, flags);
                return;
        } else {
-               printk(KERN_ERR "AMD-Vi: Event logged [");
+               dev_err(dev, "AMD-Vi: Event logged [");
        }
 
        switch (type) {
        case EVENT_TYPE_ILL_DEV:
-               printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
-                      "address=0x%016llx flags=0x%04x]\n",
-                      PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-                      address, flags);
+               dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
+                       "address=0x%016llx flags=0x%04x]\n",
+                       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+                       address, flags);
                dump_dte_entry(devid);
                break;
        case EVENT_TYPE_DEV_TAB_ERR:
-               printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
-                      "address=0x%016llx flags=0x%04x]\n",
-                      PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-                      address, flags);
+               dev_err(dev, "DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+                       "address=0x%016llx flags=0x%04x]\n",
+                       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+                       address, flags);
                break;
        case EVENT_TYPE_PAGE_TAB_ERR:
-               printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
-                      "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
-                      PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-                      domid, address, flags);
+               dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+                       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+                       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+                       domid, address, flags);
                break;
        case EVENT_TYPE_ILL_CMD:
-               printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+               dev_err(dev, "ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
                dump_command(address);
                break;
        case EVENT_TYPE_CMD_HARD_ERR:
-               printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
-                      "flags=0x%04x]\n", address, flags);
+               dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx "
+                       "flags=0x%04x]\n", address, flags);
                break;
        case EVENT_TYPE_IOTLB_INV_TO:
-               printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
-                      "address=0x%016llx]\n",
-                      PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-                      address);
+               dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
+                       "address=0x%016llx]\n",
+                       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+                       address);
                break;
        case EVENT_TYPE_INV_DEV_REQ:
-               printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
-                      "address=0x%016llx flags=0x%04x]\n",
-                      PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-                      address, flags);
+               dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
+                       "address=0x%016llx flags=0x%04x]\n",
+                       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+                       address, flags);
                break;
        default:
-               printk(KERN_ERR "UNKNOWN type=0x%02x event[0]=0x%08x "
-                      "event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
-                      type, event[0], event[1], event[2], event[3]);
+               dev_err(dev, KERN_ERR "UNKNOWN event[0]=0x%08x event[1]=0x%08x "
+                       "event[2]=0x%08x event[3]=0x%08x\n",
+                       event[0], event[1], event[2], event[3]);
        }
 
        memset(__evt, 0, 4 * sizeof(u32));
@@ -1057,9 +1054,9 @@ static int iommu_queue_command_sync(struct amd_iommu *iommu,
        unsigned long flags;
        int ret;
 
-       spin_lock_irqsave(&iommu->lock, flags);
+       raw_spin_lock_irqsave(&iommu->lock, flags);
        ret = __iommu_queue_command_sync(iommu, cmd, sync);
-       spin_unlock_irqrestore(&iommu->lock, flags);
+       raw_spin_unlock_irqrestore(&iommu->lock, flags);
 
        return ret;
 }
@@ -1085,7 +1082,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
        build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
 
-       spin_lock_irqsave(&iommu->lock, flags);
+       raw_spin_lock_irqsave(&iommu->lock, flags);
 
        iommu->cmd_sem = 0;
 
@@ -1096,7 +1093,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
        ret = wait_on_sem(&iommu->cmd_sem);
 
 out_unlock:
-       spin_unlock_irqrestore(&iommu->lock, flags);
+       raw_spin_unlock_irqrestore(&iommu->lock, flags);
 
        return ret;
 }
@@ -1606,29 +1603,26 @@ static void del_domain_from_list(struct protection_domain *domain)
 
 static u16 domain_id_alloc(void)
 {
-       unsigned long flags;
        int id;
 
-       write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+       spin_lock(&pd_bitmap_lock);
        id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
        BUG_ON(id == 0);
        if (id > 0 && id < MAX_DOMAIN_ID)
                __set_bit(id, amd_iommu_pd_alloc_bitmap);
        else
                id = 0;
-       write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       spin_unlock(&pd_bitmap_lock);
 
        return id;
 }
 
 static void domain_id_free(int id)
 {
-       unsigned long flags;
-
-       write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+       spin_lock(&pd_bitmap_lock);
        if (id > 0 && id < MAX_DOMAIN_ID)
                __clear_bit(id, amd_iommu_pd_alloc_bitmap);
-       write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       spin_unlock(&pd_bitmap_lock);
 }
 
 #define DEFINE_FREE_PT_FN(LVL, FN)                             \
@@ -2104,9 +2098,9 @@ static int attach_device(struct device *dev,
        }
 
 skip_ats_check:
-       write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+       spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
        ret = __attach_device(dev_data, domain);
-       write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
        /*
         * We might boot into a crash-kernel here. The crashed kernel
@@ -2156,9 +2150,9 @@ static void detach_device(struct device *dev)
        domain   = dev_data->domain;
 
        /* lock device table */
-       write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+       spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
        __detach_device(dev_data);
-       write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
        if (!dev_is_pci(dev))
                return;
@@ -2795,7 +2789,7 @@ static void cleanup_domain(struct protection_domain *domain)
        struct iommu_dev_data *entry;
        unsigned long flags;
 
-       write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+       spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
 
        while (!list_empty(&domain->dev_list)) {
                entry = list_first_entry(&domain->dev_list,
@@ -2803,7 +2797,7 @@ static void cleanup_domain(struct protection_domain *domain)
                __detach_device(entry);
        }
 
-       write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
 
 static void protection_domain_free(struct protection_domain *domain)
@@ -3025,15 +3019,12 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
        size_t unmap_size;
 
        if (domain->mode == PAGE_MODE_NONE)
-               return -EINVAL;
+               return 0;
 
        mutex_lock(&domain->api_lock);
        unmap_size = iommu_unmap_page(domain, iova, page_size);
        mutex_unlock(&domain->api_lock);
 
-       domain_flush_tlb_pde(domain);
-       domain_flush_complete(domain);
-
        return unmap_size;
 }
 
@@ -3151,6 +3142,19 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
        return dev_data->defer_attach;
 }
 
+static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+       struct protection_domain *dom = to_pdomain(domain);
+
+       domain_flush_tlb_pde(dom);
+       domain_flush_complete(dom);
+}
+
+static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
+                                     unsigned long iova, size_t size)
+{
+}
+
 const struct iommu_ops amd_iommu_ops = {
        .capable = amd_iommu_capable,
        .domain_alloc = amd_iommu_domain_alloc,
@@ -3169,6 +3173,9 @@ const struct iommu_ops amd_iommu_ops = {
        .apply_resv_region = amd_iommu_apply_resv_region,
        .is_attach_deferred = amd_iommu_is_attach_deferred,
        .pgsize_bitmap  = AMD_IOMMU_PGSIZES,
+       .flush_iotlb_all = amd_iommu_flush_iotlb_all,
+       .iotlb_range_add = amd_iommu_iotlb_range_add,
+       .iotlb_sync = amd_iommu_flush_iotlb_all,
 };
 
 /*****************************************************************************
@@ -3570,14 +3577,62 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
        amd_iommu_dev_table[devid].data[2] = dte;
 }
 
-static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
+static struct irq_remap_table *get_irq_table(u16 devid)
+{
+       struct irq_remap_table *table;
+
+       if (WARN_ONCE(!amd_iommu_rlookup_table[devid],
+                     "%s: no iommu for devid %x\n", __func__, devid))
+               return NULL;
+
+       table = irq_lookup_table[devid];
+       if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid))
+               return NULL;
+
+       return table;
+}
+
+static struct irq_remap_table *__alloc_irq_table(void)
+{
+       struct irq_remap_table *table;
+
+       table = kzalloc(sizeof(*table), GFP_KERNEL);
+       if (!table)
+               return NULL;
+
+       table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
+       if (!table->table) {
+               kfree(table);
+               return NULL;
+       }
+       raw_spin_lock_init(&table->lock);
+
+       if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+               memset(table->table, 0,
+                      MAX_IRQS_PER_TABLE * sizeof(u32));
+       else
+               memset(table->table, 0,
+                      (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
+       return table;
+}
+
+static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
+                                 struct irq_remap_table *table)
+{
+       irq_lookup_table[devid] = table;
+       set_dte_irq_entry(devid, table);
+       iommu_flush_dte(iommu, devid);
+}
+
+static struct irq_remap_table *alloc_irq_table(u16 devid)
 {
        struct irq_remap_table *table = NULL;
+       struct irq_remap_table *new_table = NULL;
        struct amd_iommu *iommu;
        unsigned long flags;
        u16 alias;
 
-       write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+       spin_lock_irqsave(&iommu_table_lock, flags);
 
        iommu = amd_iommu_rlookup_table[devid];
        if (!iommu)
@@ -3590,60 +3645,45 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
        alias = amd_iommu_alias_table[devid];
        table = irq_lookup_table[alias];
        if (table) {
-               irq_lookup_table[devid] = table;
-               set_dte_irq_entry(devid, table);
-               iommu_flush_dte(iommu, devid);
-               goto out;
+               set_remap_table_entry(iommu, devid, table);
+               goto out_wait;
        }
+       spin_unlock_irqrestore(&iommu_table_lock, flags);
 
        /* Nothing there yet, allocate new irq remapping table */
-       table = kzalloc(sizeof(*table), GFP_ATOMIC);
-       if (!table)
-               goto out_unlock;
-
-       /* Initialize table spin-lock */
-       spin_lock_init(&table->lock);
+       new_table = __alloc_irq_table();
+       if (!new_table)
+               return NULL;
 
-       if (ioapic)
-               /* Keep the first 32 indexes free for IOAPIC interrupts */
-               table->min_index = 32;
+       spin_lock_irqsave(&iommu_table_lock, flags);
 
-       table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC);
-       if (!table->table) {
-               kfree(table);
-               table = NULL;
+       table = irq_lookup_table[devid];
+       if (table)
                goto out_unlock;
-       }
 
-       if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
-               memset(table->table, 0,
-                      MAX_IRQS_PER_TABLE * sizeof(u32));
-       else
-               memset(table->table, 0,
-                      (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
-
-       if (ioapic) {
-               int i;
-
-               for (i = 0; i < 32; ++i)
-                       iommu->irte_ops->set_allocated(table, i);
+       table = irq_lookup_table[alias];
+       if (table) {
+               set_remap_table_entry(iommu, devid, table);
+               goto out_wait;
        }
 
-       irq_lookup_table[devid] = table;
-       set_dte_irq_entry(devid, table);
-       iommu_flush_dte(iommu, devid);
-       if (devid != alias) {
-               irq_lookup_table[alias] = table;
-               set_dte_irq_entry(alias, table);
-               iommu_flush_dte(iommu, alias);
-       }
+       table = new_table;
+       new_table = NULL;
 
-out:
+       set_remap_table_entry(iommu, devid, table);
+       if (devid != alias)
+               set_remap_table_entry(iommu, alias, table);
+
+out_wait:
        iommu_completion_wait(iommu);
 
 out_unlock:
-       write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       spin_unlock_irqrestore(&iommu_table_lock, flags);
 
+       if (new_table) {
+               kmem_cache_free(amd_iommu_irq_cache, new_table->table);
+               kfree(new_table);
+       }
        return table;
 }
 
@@ -3657,14 +3697,14 @@ static int alloc_irq_index(u16 devid, int count, bool align)
        if (!iommu)
                return -ENODEV;
 
-       table = get_irq_table(devid, false);
+       table = alloc_irq_table(devid);
        if (!table)
                return -ENODEV;
 
        if (align)
                alignment = roundup_pow_of_two(count);
 
-       spin_lock_irqsave(&table->lock, flags);
+       raw_spin_lock_irqsave(&table->lock, flags);
 
        /* Scan table for free entries */
        for (index = ALIGN(table->min_index, alignment), c = 0;
@@ -3691,7 +3731,7 @@ static int alloc_irq_index(u16 devid, int count, bool align)
        index = -ENOSPC;
 
 out:
-       spin_unlock_irqrestore(&table->lock, flags);
+       raw_spin_unlock_irqrestore(&table->lock, flags);
 
        return index;
 }
@@ -3708,11 +3748,11 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
        if (iommu == NULL)
                return -EINVAL;
 
-       table = get_irq_table(devid, false);
+       table = get_irq_table(devid);
        if (!table)
                return -ENOMEM;
 
-       spin_lock_irqsave(&table->lock, flags);
+       raw_spin_lock_irqsave(&table->lock, flags);
 
        entry = (struct irte_ga *)table->table;
        entry = &entry[index];
@@ -3723,7 +3763,7 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
        if (data)
                data->ref = entry;
 
-       spin_unlock_irqrestore(&table->lock, flags);
+       raw_spin_unlock_irqrestore(&table->lock, flags);
 
        iommu_flush_irt(iommu, devid);
        iommu_completion_wait(iommu);
@@ -3741,13 +3781,13 @@ static int modify_irte(u16 devid, int index, union irte *irte)
        if (iommu == NULL)
                return -EINVAL;
 
-       table = get_irq_table(devid, false);
+       table = get_irq_table(devid);
        if (!table)
                return -ENOMEM;
 
-       spin_lock_irqsave(&table->lock, flags);
+       raw_spin_lock_irqsave(&table->lock, flags);
        table->table[index] = irte->val;
-       spin_unlock_irqrestore(&table->lock, flags);
+       raw_spin_unlock_irqrestore(&table->lock, flags);
 
        iommu_flush_irt(iommu, devid);
        iommu_completion_wait(iommu);
@@ -3765,13 +3805,13 @@ static void free_irte(u16 devid, int index)
        if (iommu == NULL)
                return;
 
-       table = get_irq_table(devid, false);
+       table = get_irq_table(devid);
        if (!table)
                return;
 
-       spin_lock_irqsave(&table->lock, flags);
+       raw_spin_lock_irqsave(&table->lock, flags);
        iommu->irte_ops->clear_allocated(table, index);
-       spin_unlock_irqrestore(&table->lock, flags);
+       raw_spin_unlock_irqrestore(&table->lock, flags);
 
        iommu_flush_irt(iommu, devid);
        iommu_completion_wait(iommu);
@@ -3852,10 +3892,8 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
                                 u8 vector, u32 dest_apicid)
 {
        struct irte_ga *irte = (struct irte_ga *) entry;
-       struct iommu_dev_data *dev_data = search_dev_data(devid);
 
-       if (!dev_data || !dev_data->use_vapic ||
-           !irte->lo.fields_remap.guest_mode) {
+       if (!irte->lo.fields_remap.guest_mode) {
                irte->hi.fields.vector = vector;
                irte->lo.fields_remap.destination = dest_apicid;
                modify_irte_ga(devid, index, irte, NULL);
@@ -4061,7 +4099,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
        struct amd_ir_data *data = NULL;
        struct irq_cfg *cfg;
        int i, ret, devid;
-       int index = -1;
+       int index;
 
        if (!info)
                return -EINVAL;
@@ -4085,10 +4123,26 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
                return ret;
 
        if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
-               if (get_irq_table(devid, true))
+               struct irq_remap_table *table;
+               struct amd_iommu *iommu;
+
+               table = alloc_irq_table(devid);
+               if (table) {
+                       if (!table->min_index) {
+                               /*
+                                * Keep the first 32 indexes free for IOAPIC
+                                * interrupts.
+                                */
+                               table->min_index = 32;
+                               iommu = amd_iommu_rlookup_table[devid];
+                               for (i = 0; i < 32; ++i)
+                                       iommu->irte_ops->set_allocated(table, i);
+                       }
+                       WARN_ON(table->min_index != 32);
                        index = info->ioapic_pin;
-               else
-                       ret = -ENOMEM;
+               } else {
+                       index = -ENOMEM;
+               }
        } else {
                bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
 
@@ -4354,7 +4408,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 {
        unsigned long flags;
        struct amd_iommu *iommu;
-       struct irq_remap_table *irt;
+       struct irq_remap_table *table;
        struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
        int devid = ir_data->irq_2_irte.devid;
        struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
@@ -4368,11 +4422,11 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
        if (!iommu)
                return -ENODEV;
 
-       irt = get_irq_table(devid, false);
-       if (!irt)
+       table = get_irq_table(devid);
+       if (!table)
                return -ENODEV;
 
-       spin_lock_irqsave(&irt->lock, flags);
+       raw_spin_lock_irqsave(&table->lock, flags);
 
        if (ref->lo.fields_vapic.guest_mode) {
                if (cpu >= 0)
@@ -4381,7 +4435,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
                barrier();
        }
 
-       spin_unlock_irqrestore(&irt->lock, flags);
+       raw_spin_unlock_irqrestore(&table->lock, flags);
 
        iommu_flush_irt(iommu, devid);
        iommu_completion_wait(iommu);
index 4e4a615bf13f1995903ff46c1da215fc8e745812..904c575d1677ffadfe35c0087edf55849da3f7d2 100644 (file)
@@ -1474,7 +1474,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 {
        int ret;
 
-       spin_lock_init(&iommu->lock);
+       raw_spin_lock_init(&iommu->lock);
 
        /* Add IOMMU to internal data structures */
        list_add_tail(&iommu->list, &amd_iommu_list);
index 6a877ebd058b1e87a22c9fb2fe3c10035d6123ef..1c9b080276c9d88a5edda5fc28971558d157f92c 100644 (file)
@@ -408,7 +408,7 @@ extern bool amd_iommu_iotlb_sup;
 #define IRQ_TABLE_ALIGNMENT    128
 
 struct irq_remap_table {
-       spinlock_t lock;
+       raw_spinlock_t lock;
        unsigned min_index;
        u32 *table;
 };
@@ -490,7 +490,7 @@ struct amd_iommu {
        int index;
 
        /* locks the accesses to the hardware */
-       spinlock_t lock;
+       raw_spinlock_t lock;
 
        /* Pointer to PCI device of this IOMMU */
        struct pci_dev *dev;
@@ -627,7 +627,7 @@ struct devid_map {
  */
 struct iommu_dev_data {
        struct list_head list;            /* For domain->dev_list */
-       struct list_head dev_data_list;   /* For global dev_data_list */
+       struct llist_node dev_data_list;  /* For global dev_data_list */
        struct protection_domain *domain; /* Domain the device is bound to */
        u16 devid;                        /* PCI Device ID */
        u16 alias;                        /* Alias Device ID */
index 3f2f1fc68b5240cea0b7d82bcbd46e1569d40665..1d647104bccc49fd7af44985b3797bdb69aa1150 100644 (file)
@@ -22,6 +22,8 @@
 
 #include <linux/acpi.h>
 #include <linux/acpi_iort.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/dma-iommu.h>
 #include <linux/err.h>
 
 /* MMIO registers */
 #define ARM_SMMU_IDR0                  0x0
-#define IDR0_ST_LVL_SHIFT              27
-#define IDR0_ST_LVL_MASK               0x3
-#define IDR0_ST_LVL_2LVL               (1 << IDR0_ST_LVL_SHIFT)
-#define IDR0_STALL_MODEL_SHIFT         24
-#define IDR0_STALL_MODEL_MASK          0x3
-#define IDR0_STALL_MODEL_STALL         (0 << IDR0_STALL_MODEL_SHIFT)
-#define IDR0_STALL_MODEL_FORCE         (2 << IDR0_STALL_MODEL_SHIFT)
-#define IDR0_TTENDIAN_SHIFT            21
-#define IDR0_TTENDIAN_MASK             0x3
-#define IDR0_TTENDIAN_LE               (2 << IDR0_TTENDIAN_SHIFT)
-#define IDR0_TTENDIAN_BE               (3 << IDR0_TTENDIAN_SHIFT)
-#define IDR0_TTENDIAN_MIXED            (0 << IDR0_TTENDIAN_SHIFT)
+#define IDR0_ST_LVL                    GENMASK(28, 27)
+#define IDR0_ST_LVL_2LVL               1
+#define IDR0_STALL_MODEL               GENMASK(25, 24)
+#define IDR0_STALL_MODEL_STALL         0
+#define IDR0_STALL_MODEL_FORCE         2
+#define IDR0_TTENDIAN                  GENMASK(22, 21)
+#define IDR0_TTENDIAN_MIXED            0
+#define IDR0_TTENDIAN_LE               2
+#define IDR0_TTENDIAN_BE               3
 #define IDR0_CD2L                      (1 << 19)
 #define IDR0_VMID16                    (1 << 18)
 #define IDR0_PRI                       (1 << 16)
 #define IDR0_ATS                       (1 << 10)
 #define IDR0_HYP                       (1 << 9)
 #define IDR0_COHACC                    (1 << 4)
-#define IDR0_TTF_SHIFT                 2
-#define IDR0_TTF_MASK                  0x3
-#define IDR0_TTF_AARCH64               (2 << IDR0_TTF_SHIFT)
-#define IDR0_TTF_AARCH32_64            (3 << IDR0_TTF_SHIFT)
+#define IDR0_TTF                       GENMASK(3, 2)
+#define IDR0_TTF_AARCH64               2
+#define IDR0_TTF_AARCH32_64            3
 #define IDR0_S1P                       (1 << 1)
 #define IDR0_S2P                       (1 << 0)
 
 #define IDR1_TABLES_PRESET             (1 << 30)
 #define IDR1_QUEUES_PRESET             (1 << 29)
 #define IDR1_REL                       (1 << 28)
-#define IDR1_CMDQ_SHIFT                        21
-#define IDR1_CMDQ_MASK                 0x1f
-#define IDR1_EVTQ_SHIFT                        16
-#define IDR1_EVTQ_MASK                 0x1f
-#define IDR1_PRIQ_SHIFT                        11
-#define IDR1_PRIQ_MASK                 0x1f
-#define IDR1_SSID_SHIFT                        6
-#define IDR1_SSID_MASK                 0x1f
-#define IDR1_SID_SHIFT                 0
-#define IDR1_SID_MASK                  0x3f
+#define IDR1_CMDQS                     GENMASK(25, 21)
+#define IDR1_EVTQS                     GENMASK(20, 16)
+#define IDR1_PRIQS                     GENMASK(15, 11)
+#define IDR1_SSIDSIZE                  GENMASK(10, 6)
+#define IDR1_SIDSIZE                   GENMASK(5, 0)
 
 #define ARM_SMMU_IDR5                  0x14
-#define IDR5_STALL_MAX_SHIFT           16
-#define IDR5_STALL_MAX_MASK            0xffff
+#define IDR5_STALL_MAX                 GENMASK(31, 16)
 #define IDR5_GRAN64K                   (1 << 6)
 #define IDR5_GRAN16K                   (1 << 5)
 #define IDR5_GRAN4K                    (1 << 4)
-#define IDR5_OAS_SHIFT                 0
-#define IDR5_OAS_MASK                  0x7
-#define IDR5_OAS_32_BIT                        (0 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_36_BIT                        (1 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_40_BIT                        (2 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_42_BIT                        (3 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_44_BIT                        (4 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_48_BIT                        (5 << IDR5_OAS_SHIFT)
+#define IDR5_OAS                       GENMASK(2, 0)
+#define IDR5_OAS_32_BIT                        0
+#define IDR5_OAS_36_BIT                        1
+#define IDR5_OAS_40_BIT                        2
+#define IDR5_OAS_42_BIT                        3
+#define IDR5_OAS_44_BIT                        4
+#define IDR5_OAS_48_BIT                        5
+#define IDR5_OAS_52_BIT                        6
+#define IDR5_VAX                       GENMASK(11, 10)
+#define IDR5_VAX_52_BIT                        1
 
 #define ARM_SMMU_CR0                   0x20
 #define CR0_CMDQEN                     (1 << 3)
 #define ARM_SMMU_CR0ACK                        0x24
 
 #define ARM_SMMU_CR1                   0x28
-#define CR1_SH_NSH                     0
-#define CR1_SH_OSH                     2
-#define CR1_SH_ISH                     3
+#define CR1_TABLE_SH                   GENMASK(11, 10)
+#define CR1_TABLE_OC                   GENMASK(9, 8)
+#define CR1_TABLE_IC                   GENMASK(7, 6)
+#define CR1_QUEUE_SH                   GENMASK(5, 4)
+#define CR1_QUEUE_OC                   GENMASK(3, 2)
+#define CR1_QUEUE_IC                   GENMASK(1, 0)
+/* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
 #define CR1_CACHE_NC                   0
 #define CR1_CACHE_WB                   1
 #define CR1_CACHE_WT                   2
-#define CR1_TABLE_SH_SHIFT             10
-#define CR1_TABLE_OC_SHIFT             8
-#define CR1_TABLE_IC_SHIFT             6
-#define CR1_QUEUE_SH_SHIFT             4
-#define CR1_QUEUE_OC_SHIFT             2
-#define CR1_QUEUE_IC_SHIFT             0
 
 #define ARM_SMMU_CR2                   0x2c
 #define CR2_PTM                                (1 << 2)
 #define CR2_E2H                                (1 << 0)
 
 #define ARM_SMMU_GBPA                  0x44
-#define GBPA_ABORT                     (1 << 20)
 #define GBPA_UPDATE                    (1 << 31)
+#define GBPA_ABORT                     (1 << 20)
 
 #define ARM_SMMU_IRQ_CTRL              0x50
 #define IRQ_CTRL_EVTQ_IRQEN            (1 << 2)
 
 #define ARM_SMMU_STRTAB_BASE           0x80
 #define STRTAB_BASE_RA                 (1UL << 62)
-#define STRTAB_BASE_ADDR_SHIFT         6
-#define STRTAB_BASE_ADDR_MASK          0x3ffffffffffUL
+#define STRTAB_BASE_ADDR_MASK          GENMASK_ULL(51, 6)
 
 #define ARM_SMMU_STRTAB_BASE_CFG       0x88
-#define STRTAB_BASE_CFG_LOG2SIZE_SHIFT 0
-#define STRTAB_BASE_CFG_LOG2SIZE_MASK  0x3f
-#define STRTAB_BASE_CFG_SPLIT_SHIFT    6
-#define STRTAB_BASE_CFG_SPLIT_MASK     0x1f
-#define STRTAB_BASE_CFG_FMT_SHIFT      16
-#define STRTAB_BASE_CFG_FMT_MASK       0x3
-#define STRTAB_BASE_CFG_FMT_LINEAR     (0 << STRTAB_BASE_CFG_FMT_SHIFT)
-#define STRTAB_BASE_CFG_FMT_2LVL       (1 << STRTAB_BASE_CFG_FMT_SHIFT)
+#define STRTAB_BASE_CFG_FMT            GENMASK(17, 16)
+#define STRTAB_BASE_CFG_FMT_LINEAR     0
+#define STRTAB_BASE_CFG_FMT_2LVL       1
+#define STRTAB_BASE_CFG_SPLIT          GENMASK(10, 6)
+#define STRTAB_BASE_CFG_LOG2SIZE       GENMASK(5, 0)
 
 #define ARM_SMMU_CMDQ_BASE             0x90
 #define ARM_SMMU_CMDQ_PROD             0x98
 #define ARM_SMMU_PRIQ_IRQ_CFG2         0xdc
 
 /* Common MSI config fields */
-#define MSI_CFG0_ADDR_SHIFT            2
-#define MSI_CFG0_ADDR_MASK             0x3fffffffffffUL
-#define MSI_CFG2_SH_SHIFT              4
-#define MSI_CFG2_SH_NSH                        (0UL << MSI_CFG2_SH_SHIFT)
-#define MSI_CFG2_SH_OSH                        (2UL << MSI_CFG2_SH_SHIFT)
-#define MSI_CFG2_SH_ISH                        (3UL << MSI_CFG2_SH_SHIFT)
-#define MSI_CFG2_MEMATTR_SHIFT         0
-#define MSI_CFG2_MEMATTR_DEVICE_nGnRE  (0x1 << MSI_CFG2_MEMATTR_SHIFT)
+#define MSI_CFG0_ADDR_MASK             GENMASK_ULL(51, 2)
+#define MSI_CFG2_SH                    GENMASK(5, 4)
+#define MSI_CFG2_MEMATTR               GENMASK(3, 0)
+
+/* Common memory attribute values */
+#define ARM_SMMU_SH_NSH                        0
+#define ARM_SMMU_SH_OSH                        2
+#define ARM_SMMU_SH_ISH                        3
+#define ARM_SMMU_MEMATTR_DEVICE_nGnRE  0x1
+#define ARM_SMMU_MEMATTR_OIWB          0xf
 
 #define Q_IDX(q, p)                    ((p) & ((1 << (q)->max_n_shift) - 1))
 #define Q_WRP(q, p)                    ((p) & (1 << (q)->max_n_shift))
                                         Q_IDX(q, p) * (q)->ent_dwords)
 
 #define Q_BASE_RWA                     (1UL << 62)
-#define Q_BASE_ADDR_SHIFT              5
-#define Q_BASE_ADDR_MASK               0xfffffffffffUL
-#define Q_BASE_LOG2SIZE_SHIFT          0
-#define Q_BASE_LOG2SIZE_MASK           0x1fUL
+#define Q_BASE_ADDR_MASK               GENMASK_ULL(51, 5)
+#define Q_BASE_LOG2SIZE                        GENMASK(4, 0)
 
 /*
  * Stream table.
 #define STRTAB_SPLIT                   8
 
 #define STRTAB_L1_DESC_DWORDS          1
-#define STRTAB_L1_DESC_SPAN_SHIFT      0
-#define STRTAB_L1_DESC_SPAN_MASK       0x1fUL
-#define STRTAB_L1_DESC_L2PTR_SHIFT     6
-#define STRTAB_L1_DESC_L2PTR_MASK      0x3ffffffffffUL
+#define STRTAB_L1_DESC_SPAN            GENMASK_ULL(4, 0)
+#define STRTAB_L1_DESC_L2PTR_MASK      GENMASK_ULL(51, 6)
 
 #define STRTAB_STE_DWORDS              8
 #define STRTAB_STE_0_V                 (1UL << 0)
-#define STRTAB_STE_0_CFG_SHIFT         1
-#define STRTAB_STE_0_CFG_MASK          0x7UL
-#define STRTAB_STE_0_CFG_ABORT         (0UL << STRTAB_STE_0_CFG_SHIFT)
-#define STRTAB_STE_0_CFG_BYPASS                (4UL << STRTAB_STE_0_CFG_SHIFT)
-#define STRTAB_STE_0_CFG_S1_TRANS      (5UL << STRTAB_STE_0_CFG_SHIFT)
-#define STRTAB_STE_0_CFG_S2_TRANS      (6UL << STRTAB_STE_0_CFG_SHIFT)
-
-#define STRTAB_STE_0_S1FMT_SHIFT       4
-#define STRTAB_STE_0_S1FMT_LINEAR      (0UL << STRTAB_STE_0_S1FMT_SHIFT)
-#define STRTAB_STE_0_S1CTXPTR_SHIFT    6
-#define STRTAB_STE_0_S1CTXPTR_MASK     0x3ffffffffffUL
-#define STRTAB_STE_0_S1CDMAX_SHIFT     59
-#define STRTAB_STE_0_S1CDMAX_MASK      0x1fUL
+#define STRTAB_STE_0_CFG               GENMASK_ULL(3, 1)
+#define STRTAB_STE_0_CFG_ABORT         0
+#define STRTAB_STE_0_CFG_BYPASS                4
+#define STRTAB_STE_0_CFG_S1_TRANS      5
+#define STRTAB_STE_0_CFG_S2_TRANS      6
+
+#define STRTAB_STE_0_S1FMT             GENMASK_ULL(5, 4)
+#define STRTAB_STE_0_S1FMT_LINEAR      0
+#define STRTAB_STE_0_S1CTXPTR_MASK     GENMASK_ULL(51, 6)
+#define STRTAB_STE_0_S1CDMAX           GENMASK_ULL(63, 59)
 
 #define STRTAB_STE_1_S1C_CACHE_NC      0UL
 #define STRTAB_STE_1_S1C_CACHE_WBRA    1UL
 #define STRTAB_STE_1_S1C_CACHE_WT      2UL
 #define STRTAB_STE_1_S1C_CACHE_WB      3UL
-#define STRTAB_STE_1_S1C_SH_NSH                0UL
-#define STRTAB_STE_1_S1C_SH_OSH                2UL
-#define STRTAB_STE_1_S1C_SH_ISH                3UL
-#define STRTAB_STE_1_S1CIR_SHIFT       2
-#define STRTAB_STE_1_S1COR_SHIFT       4
-#define STRTAB_STE_1_S1CSH_SHIFT       6
+#define STRTAB_STE_1_S1CIR             GENMASK_ULL(3, 2)
+#define STRTAB_STE_1_S1COR             GENMASK_ULL(5, 4)
+#define STRTAB_STE_1_S1CSH             GENMASK_ULL(7, 6)
 
 #define STRTAB_STE_1_S1STALLD          (1UL << 27)
 
+#define STRTAB_STE_1_EATS              GENMASK_ULL(29, 28)
 #define STRTAB_STE_1_EATS_ABT          0UL
 #define STRTAB_STE_1_EATS_TRANS                1UL
 #define STRTAB_STE_1_EATS_S1CHK                2UL
-#define STRTAB_STE_1_EATS_SHIFT                28
 
+#define STRTAB_STE_1_STRW              GENMASK_ULL(31, 30)
 #define STRTAB_STE_1_STRW_NSEL1                0UL
 #define STRTAB_STE_1_STRW_EL2          2UL
-#define STRTAB_STE_1_STRW_SHIFT                30
 
+#define STRTAB_STE_1_SHCFG             GENMASK_ULL(45, 44)
 #define STRTAB_STE_1_SHCFG_INCOMING    1UL
-#define STRTAB_STE_1_SHCFG_SHIFT       44
 
-#define STRTAB_STE_2_S2VMID_SHIFT      0
-#define STRTAB_STE_2_S2VMID_MASK       0xffffUL
-#define STRTAB_STE_2_VTCR_SHIFT                32
-#define STRTAB_STE_2_VTCR_MASK         0x7ffffUL
+#define STRTAB_STE_2_S2VMID            GENMASK_ULL(15, 0)
+#define STRTAB_STE_2_VTCR              GENMASK_ULL(50, 32)
 #define STRTAB_STE_2_S2AA64            (1UL << 51)
 #define STRTAB_STE_2_S2ENDI            (1UL << 52)
 #define STRTAB_STE_2_S2PTW             (1UL << 54)
 #define STRTAB_STE_2_S2R               (1UL << 58)
 
-#define STRTAB_STE_3_S2TTB_SHIFT       4
-#define STRTAB_STE_3_S2TTB_MASK                0xfffffffffffUL
+#define STRTAB_STE_3_S2TTB_MASK                GENMASK_ULL(51, 4)
 
 /* Context descriptor (stage-1 only) */
 #define CTXDESC_CD_DWORDS              8
-#define CTXDESC_CD_0_TCR_T0SZ_SHIFT    0
-#define ARM64_TCR_T0SZ_SHIFT           0
-#define ARM64_TCR_T0SZ_MASK            0x1fUL
-#define CTXDESC_CD_0_TCR_TG0_SHIFT     6
-#define ARM64_TCR_TG0_SHIFT            14
-#define ARM64_TCR_TG0_MASK             0x3UL
-#define CTXDESC_CD_0_TCR_IRGN0_SHIFT   8
-#define ARM64_TCR_IRGN0_SHIFT          8
-#define ARM64_TCR_IRGN0_MASK           0x3UL
-#define CTXDESC_CD_0_TCR_ORGN0_SHIFT   10
-#define ARM64_TCR_ORGN0_SHIFT          10
-#define ARM64_TCR_ORGN0_MASK           0x3UL
-#define CTXDESC_CD_0_TCR_SH0_SHIFT     12
-#define ARM64_TCR_SH0_SHIFT            12
-#define ARM64_TCR_SH0_MASK             0x3UL
-#define CTXDESC_CD_0_TCR_EPD0_SHIFT    14
-#define ARM64_TCR_EPD0_SHIFT           7
-#define ARM64_TCR_EPD0_MASK            0x1UL
-#define CTXDESC_CD_0_TCR_EPD1_SHIFT    30
-#define ARM64_TCR_EPD1_SHIFT           23
-#define ARM64_TCR_EPD1_MASK            0x1UL
+#define CTXDESC_CD_0_TCR_T0SZ          GENMASK_ULL(5, 0)
+#define ARM64_TCR_T0SZ                 GENMASK_ULL(5, 0)
+#define CTXDESC_CD_0_TCR_TG0           GENMASK_ULL(7, 6)
+#define ARM64_TCR_TG0                  GENMASK_ULL(15, 14)
+#define CTXDESC_CD_0_TCR_IRGN0         GENMASK_ULL(9, 8)
+#define ARM64_TCR_IRGN0                        GENMASK_ULL(9, 8)
+#define CTXDESC_CD_0_TCR_ORGN0         GENMASK_ULL(11, 10)
+#define ARM64_TCR_ORGN0                        GENMASK_ULL(11, 10)
+#define CTXDESC_CD_0_TCR_SH0           GENMASK_ULL(13, 12)
+#define ARM64_TCR_SH0                  GENMASK_ULL(13, 12)
+#define CTXDESC_CD_0_TCR_EPD0          (1ULL << 14)
+#define ARM64_TCR_EPD0                 (1ULL << 7)
+#define CTXDESC_CD_0_TCR_EPD1          (1ULL << 30)
+#define ARM64_TCR_EPD1                 (1ULL << 23)
 
 #define CTXDESC_CD_0_ENDI              (1UL << 15)
 #define CTXDESC_CD_0_V                 (1UL << 31)
 
-#define CTXDESC_CD_0_TCR_IPS_SHIFT     32
-#define ARM64_TCR_IPS_SHIFT            32
-#define ARM64_TCR_IPS_MASK             0x7UL
-#define CTXDESC_CD_0_TCR_TBI0_SHIFT    38
-#define ARM64_TCR_TBI0_SHIFT           37
-#define ARM64_TCR_TBI0_MASK            0x1UL
+#define CTXDESC_CD_0_TCR_IPS           GENMASK_ULL(34, 32)
+#define ARM64_TCR_IPS                  GENMASK_ULL(34, 32)
+#define CTXDESC_CD_0_TCR_TBI0          (1ULL << 38)
+#define ARM64_TCR_TBI0                 (1ULL << 37)
 
 #define CTXDESC_CD_0_AA64              (1UL << 41)
 #define CTXDESC_CD_0_S                 (1UL << 44)
 #define CTXDESC_CD_0_R                 (1UL << 45)
 #define CTXDESC_CD_0_A                 (1UL << 46)
-#define CTXDESC_CD_0_ASET_SHIFT                47
-#define CTXDESC_CD_0_ASET_SHARED       (0UL << CTXDESC_CD_0_ASET_SHIFT)
-#define CTXDESC_CD_0_ASET_PRIVATE      (1UL << CTXDESC_CD_0_ASET_SHIFT)
-#define CTXDESC_CD_0_ASID_SHIFT                48
-#define CTXDESC_CD_0_ASID_MASK         0xffffUL
-
-#define CTXDESC_CD_1_TTB0_SHIFT                4
-#define CTXDESC_CD_1_TTB0_MASK         0xfffffffffffUL
+#define CTXDESC_CD_0_ASET              (1UL << 47)
+#define CTXDESC_CD_0_ASID              GENMASK_ULL(63, 48)
 
-#define CTXDESC_CD_3_MAIR_SHIFT                0
+#define CTXDESC_CD_1_TTB0_MASK         GENMASK_ULL(51, 4)
 
 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
-#define ARM_SMMU_TCR2CD(tcr, fld)                                      \
-       (((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)    \
-        << CTXDESC_CD_0_TCR_##fld##_SHIFT)
+#define ARM_SMMU_TCR2CD(tcr, fld)      FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
+                                       FIELD_GET(ARM64_TCR_##fld, tcr))
 
 /* Command queue */
 #define CMDQ_ENT_DWORDS                        2
 #define CMDQ_MAX_SZ_SHIFT              8
 
-#define CMDQ_ERR_SHIFT                 24
-#define CMDQ_ERR_MASK                  0x7f
+#define CMDQ_CONS_ERR                  GENMASK(30, 24)
 #define CMDQ_ERR_CERROR_NONE_IDX       0
 #define CMDQ_ERR_CERROR_ILL_IDX                1
 #define CMDQ_ERR_CERROR_ABT_IDX                2
 
-#define CMDQ_0_OP_SHIFT                        0
-#define CMDQ_0_OP_MASK                 0xffUL
+#define CMDQ_0_OP                      GENMASK_ULL(7, 0)
 #define CMDQ_0_SSV                     (1UL << 11)
 
-#define CMDQ_PREFETCH_0_SID_SHIFT      32
-#define CMDQ_PREFETCH_1_SIZE_SHIFT     0
-#define CMDQ_PREFETCH_1_ADDR_MASK      ~0xfffUL
+#define CMDQ_PREFETCH_0_SID            GENMASK_ULL(63, 32)
+#define CMDQ_PREFETCH_1_SIZE           GENMASK_ULL(4, 0)
+#define CMDQ_PREFETCH_1_ADDR_MASK      GENMASK_ULL(63, 12)
 
-#define CMDQ_CFGI_0_SID_SHIFT          32
-#define CMDQ_CFGI_0_SID_MASK           0xffffffffUL
+#define CMDQ_CFGI_0_SID                        GENMASK_ULL(63, 32)
 #define CMDQ_CFGI_1_LEAF               (1UL << 0)
-#define CMDQ_CFGI_1_RANGE_SHIFT                0
-#define CMDQ_CFGI_1_RANGE_MASK         0x1fUL
+#define CMDQ_CFGI_1_RANGE              GENMASK_ULL(4, 0)
 
-#define CMDQ_TLBI_0_VMID_SHIFT         32
-#define CMDQ_TLBI_0_ASID_SHIFT         48
+#define CMDQ_TLBI_0_VMID               GENMASK_ULL(47, 32)
+#define CMDQ_TLBI_0_ASID               GENMASK_ULL(63, 48)
 #define CMDQ_TLBI_1_LEAF               (1UL << 0)
-#define CMDQ_TLBI_1_VA_MASK            ~0xfffUL
-#define CMDQ_TLBI_1_IPA_MASK           0xfffffffff000UL
-
-#define CMDQ_PRI_0_SSID_SHIFT          12
-#define CMDQ_PRI_0_SSID_MASK           0xfffffUL
-#define CMDQ_PRI_0_SID_SHIFT           32
-#define CMDQ_PRI_0_SID_MASK            0xffffffffUL
-#define CMDQ_PRI_1_GRPID_SHIFT         0
-#define CMDQ_PRI_1_GRPID_MASK          0x1ffUL
-#define CMDQ_PRI_1_RESP_SHIFT          12
-#define CMDQ_PRI_1_RESP_DENY           (0UL << CMDQ_PRI_1_RESP_SHIFT)
-#define CMDQ_PRI_1_RESP_FAIL           (1UL << CMDQ_PRI_1_RESP_SHIFT)
-#define CMDQ_PRI_1_RESP_SUCC           (2UL << CMDQ_PRI_1_RESP_SHIFT)
-
-#define CMDQ_SYNC_0_CS_SHIFT           12
-#define CMDQ_SYNC_0_CS_NONE            (0UL << CMDQ_SYNC_0_CS_SHIFT)
-#define CMDQ_SYNC_0_CS_IRQ             (1UL << CMDQ_SYNC_0_CS_SHIFT)
-#define CMDQ_SYNC_0_CS_SEV             (2UL << CMDQ_SYNC_0_CS_SHIFT)
-#define CMDQ_SYNC_0_MSH_SHIFT          22
-#define CMDQ_SYNC_0_MSH_ISH            (3UL << CMDQ_SYNC_0_MSH_SHIFT)
-#define CMDQ_SYNC_0_MSIATTR_SHIFT      24
-#define CMDQ_SYNC_0_MSIATTR_OIWB       (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
-#define CMDQ_SYNC_0_MSIDATA_SHIFT      32
-#define CMDQ_SYNC_0_MSIDATA_MASK       0xffffffffUL
-#define CMDQ_SYNC_1_MSIADDR_SHIFT      0
-#define CMDQ_SYNC_1_MSIADDR_MASK       0xffffffffffffcUL
+#define CMDQ_TLBI_1_VA_MASK            GENMASK_ULL(63, 12)
+#define CMDQ_TLBI_1_IPA_MASK           GENMASK_ULL(51, 12)
+
+#define CMDQ_PRI_0_SSID                        GENMASK_ULL(31, 12)
+#define CMDQ_PRI_0_SID                 GENMASK_ULL(63, 32)
+#define CMDQ_PRI_1_GRPID               GENMASK_ULL(8, 0)
+#define CMDQ_PRI_1_RESP                        GENMASK_ULL(13, 12)
+
+#define CMDQ_SYNC_0_CS                 GENMASK_ULL(13, 12)
+#define CMDQ_SYNC_0_CS_NONE            0
+#define CMDQ_SYNC_0_CS_IRQ             1
+#define CMDQ_SYNC_0_CS_SEV             2
+#define CMDQ_SYNC_0_MSH                        GENMASK_ULL(23, 22)
+#define CMDQ_SYNC_0_MSIATTR            GENMASK_ULL(27, 24)
+#define CMDQ_SYNC_0_MSIDATA            GENMASK_ULL(63, 32)
+#define CMDQ_SYNC_1_MSIADDR_MASK       GENMASK_ULL(51, 2)
 
 /* Event queue */
 #define EVTQ_ENT_DWORDS                        4
 #define EVTQ_MAX_SZ_SHIFT              7
 
-#define EVTQ_0_ID_SHIFT                        0
-#define EVTQ_0_ID_MASK                 0xffUL
+#define EVTQ_0_ID                      GENMASK_ULL(7, 0)
 
 /* PRI queue */
 #define PRIQ_ENT_DWORDS                        2
 #define PRIQ_MAX_SZ_SHIFT              8
 
-#define PRIQ_0_SID_SHIFT               0
-#define PRIQ_0_SID_MASK                        0xffffffffUL
-#define PRIQ_0_SSID_SHIFT              32
-#define PRIQ_0_SSID_MASK               0xfffffUL
+#define PRIQ_0_SID                     GENMASK_ULL(31, 0)
+#define PRIQ_0_SSID                    GENMASK_ULL(51, 32)
 #define PRIQ_0_PERM_PRIV               (1UL << 58)
 #define PRIQ_0_PERM_EXEC               (1UL << 59)
 #define PRIQ_0_PERM_READ               (1UL << 60)
 #define PRIQ_0_PRG_LAST                        (1UL << 62)
 #define PRIQ_0_SSID_V                  (1UL << 63)
 
-#define PRIQ_1_PRG_IDX_SHIFT           0
-#define PRIQ_1_PRG_IDX_MASK            0x1ffUL
-#define PRIQ_1_ADDR_SHIFT              12
-#define PRIQ_1_ADDR_MASK               0xfffffffffffffUL
+#define PRIQ_1_PRG_IDX                 GENMASK_ULL(8, 0)
+#define PRIQ_1_ADDR_MASK               GENMASK_ULL(63, 12)
 
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US       100
@@ -430,9 +372,9 @@ MODULE_PARM_DESC(disable_bypass,
        "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 
 enum pri_resp {
-       PRI_RESP_DENY,
-       PRI_RESP_FAIL,
-       PRI_RESP_SUCC,
+       PRI_RESP_DENY = 0,
+       PRI_RESP_FAIL = 1,
+       PRI_RESP_SUCC = 2,
 };
 
 enum arm_smmu_msi_index {
@@ -611,6 +553,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_STALLS           (1 << 11)
 #define ARM_SMMU_FEAT_HYP              (1 << 12)
 #define ARM_SMMU_FEAT_STALL_FORCE      (1 << 13)
+#define ARM_SMMU_FEAT_VAX              (1 << 14)
        u32                             features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH     (1 << 0)
@@ -836,67 +779,64 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 {
        memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
-       cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
+       cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
 
        switch (ent->opcode) {
        case CMDQ_OP_TLBI_EL2_ALL:
        case CMDQ_OP_TLBI_NSNH_ALL:
                break;
        case CMDQ_OP_PREFETCH_CFG:
-               cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
-               cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
+               cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
+               cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
                cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
                break;
        case CMDQ_OP_CFGI_STE:
-               cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
-               cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
+               cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
+               cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
                break;
        case CMDQ_OP_CFGI_ALL:
                /* Cover the entire SID range */
-               cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
+               cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
                break;
        case CMDQ_OP_TLBI_NH_VA:
-               cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
-               cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
+               cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
+               cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
                cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
                break;
        case CMDQ_OP_TLBI_S2_IPA:
-               cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
-               cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
+               cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
+               cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
                cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
                break;
        case CMDQ_OP_TLBI_NH_ASID:
-               cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
+               cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
                /* Fallthrough */
        case CMDQ_OP_TLBI_S12_VMALL:
-               cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
+               cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
                break;
        case CMDQ_OP_PRI_RESP:
-               cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
-               cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
-               cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
-               cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
+               cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
+               cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
+               cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
+               cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
                switch (ent->pri.resp) {
                case PRI_RESP_DENY:
-                       cmd[1] |= CMDQ_PRI_1_RESP_DENY;
-                       break;
                case PRI_RESP_FAIL:
-                       cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
-                       break;
                case PRI_RESP_SUCC:
-                       cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
                        break;
                default:
                        return -EINVAL;
                }
+               cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
                break;
        case CMDQ_OP_CMD_SYNC:
                if (ent->sync.msiaddr)
-                       cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
+                       cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
                else
-                       cmd[0] |= CMDQ_SYNC_0_CS_SEV;
-               cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
-               cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
+                       cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
+               cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
+               cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+               cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata);
                cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
                break;
        default:
@@ -918,7 +858,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
        u64 cmd[CMDQ_ENT_DWORDS];
        struct arm_smmu_queue *q = &smmu->cmdq.q;
        u32 cons = readl_relaxed(q->cons_reg);
-       u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
+       u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
        struct arm_smmu_cmdq_ent cmd_sync = {
                .opcode = CMDQ_OP_CMD_SYNC,
        };
@@ -1083,8 +1023,8 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 #ifdef __BIG_ENDIAN
              CTXDESC_CD_0_ENDI |
 #endif
-             CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
-             CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
+             CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
+             CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
              CTXDESC_CD_0_V;
 
        /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
@@ -1093,10 +1033,10 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 
        cfg->cdptr[0] = cpu_to_le64(val);
 
-       val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
+       val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
        cfg->cdptr[1] = cpu_to_le64(val);
 
-       cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
+       cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
 }
 
 /* Stream table manipulation functions */
@@ -1105,10 +1045,8 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
 {
        u64 val = 0;
 
-       val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
-               << STRTAB_L1_DESC_SPAN_SHIFT;
-       val |= desc->l2ptr_dma &
-              STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
+       val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
+       val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
 
        *dst = cpu_to_le64(val);
 }
@@ -1156,10 +1094,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
        };
 
        if (val & STRTAB_STE_0_V) {
-               u64 cfg;
-
-               cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
-               switch (cfg) {
+               switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
                case STRTAB_STE_0_CFG_BYPASS:
                        break;
                case STRTAB_STE_0_CFG_S1_TRANS:
@@ -1180,13 +1115,13 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
        /* Bypass/fault */
        if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
                if (!ste->assigned && disable_bypass)
-                       val |= STRTAB_STE_0_CFG_ABORT;
+                       val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
                else
-                       val |= STRTAB_STE_0_CFG_BYPASS;
+                       val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
 
                dst[0] = cpu_to_le64(val);
-               dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
-                        << STRTAB_STE_1_SHCFG_SHIFT);
+               dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+                                               STRTAB_STE_1_SHCFG_INCOMING));
                dst[2] = 0; /* Nuke the VMID */
                /*
                 * The SMMU can perform negative caching, so we must sync
@@ -1200,41 +1135,36 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
        if (ste->s1_cfg) {
                BUG_ON(ste_live);
                dst[1] = cpu_to_le64(
-                        STRTAB_STE_1_S1C_CACHE_WBRA
-                        << STRTAB_STE_1_S1CIR_SHIFT |
-                        STRTAB_STE_1_S1C_CACHE_WBRA
-                        << STRTAB_STE_1_S1COR_SHIFT |
-                        STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
+                        FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+                        FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+                        FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
 #ifdef CONFIG_PCI_ATS
-                        STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
+                        FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
 #endif
-                        STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
+                        FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
 
                if (smmu->features & ARM_SMMU_FEAT_STALLS &&
                   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
                        dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
-               val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
-                       << STRTAB_STE_0_S1CTXPTR_SHIFT) |
-                       STRTAB_STE_0_CFG_S1_TRANS;
+               val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+                       FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
        }
 
        if (ste->s2_cfg) {
                BUG_ON(ste_live);
                dst[2] = cpu_to_le64(
-                        ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
-                        (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
-                         << STRTAB_STE_2_VTCR_SHIFT |
+                        FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
+                        FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
 #ifdef __BIG_ENDIAN
                         STRTAB_STE_2_S2ENDI |
 #endif
                         STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
                         STRTAB_STE_2_S2R);
 
-               dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
-                        STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
+               dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
 
-               val |= STRTAB_STE_0_CFG_S2_TRANS;
+               val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
        }
 
        arm_smmu_sync_ste_for_sid(smmu, sid);
@@ -1295,7 +1225,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
 
        do {
                while (!queue_remove_raw(q, evt)) {
-                       u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
+                       u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
 
                        dev_info(smmu->dev, "event 0x%02x received:\n", id);
                        for (i = 0; i < ARRAY_SIZE(evt); ++i)
@@ -1323,11 +1253,11 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
        u16 grpid;
        bool ssv, last;
 
-       sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
-       ssv = evt[0] & PRIQ_0_SSID_V;
-       ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
-       last = evt[0] & PRIQ_0_PRG_LAST;
-       grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
+       sid = FIELD_GET(PRIQ_0_SID, evt[0]);
+       ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
+       ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
+       last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
+       grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
 
        dev_info(smmu->dev, "unexpected PRI request received:\n");
        dev_info(smmu->dev,
@@ -1337,7 +1267,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
                 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
                 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
                 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
-                evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
+                evt[1] & PRIQ_1_ADDR_MASK);
 
        if (last) {
                struct arm_smmu_cmdq_ent cmd = {
@@ -1664,7 +1594,8 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 
        switch (smmu_domain->stage) {
        case ARM_SMMU_DOMAIN_S1:
-               ias = VA_BITS;
+               ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
+               ias = min_t(unsigned long, ias, VA_BITS);
                oas = smmu->ias;
                fmt = ARM_64_LPAE_S1;
                finalise_stage_fn = arm_smmu_domain_finalise_s1;
@@ -1696,7 +1627,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
                return -ENOMEM;
 
        domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
-       domain->geometry.aperture_end = (1UL << ias) - 1;
+       domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
        domain->geometry.force_aperture = true;
 
        ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
@@ -2102,9 +2033,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
        q->ent_dwords   = dwords;
 
        q->q_base  = Q_BASE_RWA;
-       q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
-       q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
-                    << Q_BASE_LOG2SIZE_SHIFT;
+       q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
+       q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
 
        q->prod = q->cons = 0;
        return 0;
@@ -2186,11 +2116,9 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
        cfg->strtab = strtab;
 
        /* Configure strtab_base_cfg for 2 levels */
-       reg  = STRTAB_BASE_CFG_FMT_2LVL;
-       reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
-               << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
-       reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
-               << STRTAB_BASE_CFG_SPLIT_SHIFT;
+       reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
+       reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
+       reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
        cfg->strtab_base_cfg = reg;
 
        return arm_smmu_init_l1_strtab(smmu);
@@ -2216,9 +2144,8 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
        cfg->num_l1_ents = 1 << smmu->sid_bits;
 
        /* Configure strtab_base_cfg for a linear table covering all SIDs */
-       reg  = STRTAB_BASE_CFG_FMT_LINEAR;
-       reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
-               << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
+       reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
+       reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
        cfg->strtab_base_cfg = reg;
 
        arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
@@ -2239,8 +2166,7 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
                return ret;
 
        /* Set the strtab base address */
-       reg  = smmu->strtab_cfg.strtab_dma &
-              STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
+       reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
        reg |= STRTAB_BASE_RA;
        smmu->strtab_cfg.strtab_base = reg;
 
@@ -2303,11 +2229,11 @@ static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
        phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
 
        doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
-       doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
+       doorbell &= MSI_CFG0_ADDR_MASK;
 
        writeq_relaxed(doorbell, smmu->base + cfg[0]);
        writel_relaxed(msg->data, smmu->base + cfg[1]);
-       writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
+       writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
 }
 
 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
@@ -2328,10 +2254,15 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
        if (!(smmu->features & ARM_SMMU_FEAT_MSI))
                return;
 
+       if (!dev->msi_domain) {
+               dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
+               return;
+       }
+
        /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
        ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
        if (ret) {
-               dev_warn(dev, "failed to allocate MSIs\n");
+               dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
                return;
        }
 
@@ -2370,6 +2301,8 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
                                                "arm-smmu-v3-evtq", smmu);
                if (ret < 0)
                        dev_warn(smmu->dev, "failed to enable evtq irq\n");
+       } else {
+               dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
        }
 
        irq = smmu->gerr_irq;
@@ -2378,6 +2311,8 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
                                       0, "arm-smmu-v3-gerror", smmu);
                if (ret < 0)
                        dev_warn(smmu->dev, "failed to enable gerror irq\n");
+       } else {
+               dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
        }
 
        if (smmu->features & ARM_SMMU_FEAT_PRI) {
@@ -2391,6 +2326,8 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
                        if (ret < 0)
                                dev_warn(smmu->dev,
                                         "failed to enable priq irq\n");
+               } else {
+                       dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
                }
        }
 }
@@ -2463,12 +2400,12 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
                return ret;
 
        /* CR1 (table and queue memory attributes) */
-       reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
-             (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
-             (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
-             (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
-             (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
-             (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
+       reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
+             FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
+             FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
+             FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
+             FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
+             FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
        writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
 
        /* CR2 (random crap) */
@@ -2578,7 +2515,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
 
        /* 2-level structures */
-       if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
+       if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
                smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
 
        if (reg & IDR0_CD2L)
@@ -2589,7 +2526,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
         * We currently require the same endianness as the CPU, but this
         * could be changed later by adding a new IO_PGTABLE_QUIRK.
         */
-       switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
+       switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
        case IDR0_TTENDIAN_MIXED:
                smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
                break;
@@ -2631,7 +2568,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
                dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
                         coherent ? "true" : "false");
 
-       switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
+       switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
        case IDR0_STALL_MODEL_FORCE:
                smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
                /* Fallthrough */
@@ -2651,7 +2588,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
        }
 
        /* We only support the AArch64 table format at present */
-       switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
+       switch (FIELD_GET(IDR0_TTF, reg)) {
        case IDR0_TTF_AARCH32_64:
                smmu->ias = 40;
                /* Fallthrough */
@@ -2674,22 +2611,22 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
        }
 
        /* Queue sizes, capped at 4k */
-       smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
-                                      reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
+       smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
+                                        FIELD_GET(IDR1_CMDQS, reg));
        if (!smmu->cmdq.q.max_n_shift) {
                /* Odd alignment restrictions on the base, so ignore for now */
                dev_err(smmu->dev, "unit-length command queue not supported\n");
                return -ENXIO;
        }
 
-       smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
-                                      reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
-       smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
-                                      reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
+       smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
+                                        FIELD_GET(IDR1_EVTQS, reg));
+       smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
+                                        FIELD_GET(IDR1_PRIQS, reg));
 
        /* SID/SSID sizes */
-       smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
-       smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
+       smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
+       smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
 
        /*
         * If the SMMU supports fewer bits than would fill a single L2 stream
@@ -2702,8 +2639,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
        reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
 
        /* Maximum number of outstanding stalls */
-       smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
-                               & IDR5_STALL_MAX_MASK;
+       smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
 
        /* Page sizes */
        if (reg & IDR5_GRAN64K)
@@ -2713,13 +2649,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
        if (reg & IDR5_GRAN4K)
                smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
 
-       if (arm_smmu_ops.pgsize_bitmap == -1UL)
-               arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
-       else
-               arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
+       /* Input address size */
+       if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
+               smmu->features |= ARM_SMMU_FEAT_VAX;
 
        /* Output address size */
-       switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
+       switch (FIELD_GET(IDR5_OAS, reg)) {
        case IDR5_OAS_32_BIT:
                smmu->oas = 32;
                break;
@@ -2735,6 +2670,10 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
        case IDR5_OAS_44_BIT:
                smmu->oas = 44;
                break;
+       case IDR5_OAS_52_BIT:
+               smmu->oas = 52;
+               smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
+               break;
        default:
                dev_info(smmu->dev,
                        "unknown output address size. Truncating to 48-bit\n");
@@ -2743,6 +2682,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
                smmu->oas = 48;
        }
 
+       if (arm_smmu_ops.pgsize_bitmap == -1UL)
+               arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
+       else
+               arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
+
        /* Set the DMA mask for our table walker */
        if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
                dev_warn(smmu->dev,
index 25914d36c5ace5e1336251c2e1ec55d6ad517b94..f05f3cf9075675a4e466bda1d0621376fb5fea44 100644 (file)
@@ -19,6 +19,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi_iort.h>
 #include <linux/device.h>
 #include <linux/dma-iommu.h>
 #include <linux/gfp.h>
@@ -167,13 +168,18 @@ EXPORT_SYMBOL(iommu_put_dma_cookie);
  *
  * IOMMU drivers can use this to implement their .get_resv_regions callback
  * for general non-IOMMU-specific reservations. Currently, this covers host
- * bridge windows for PCI devices.
+ * bridge windows for PCI devices and GICv3 ITS region reservation on ACPI
+ * based ARM platforms that may require HW MSI reservation.
  */
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
 {
        struct pci_host_bridge *bridge;
        struct resource_entry *window;
 
+       if (!is_of_node(dev->iommu_fwspec->iommu_fwnode) &&
+               iort_iommu_msi_get_resv_regions(dev, list) < 0)
+               return;
+
        if (!dev_is_pci(dev))
                return;
 
index 9a7ffd13c7f07d037814b3eb7f1bba75a13641f2..accf58388bdb4892369f0bac43f928667e937035 100644 (file)
@@ -806,7 +806,7 @@ int __init dmar_dev_scope_init(void)
        return dmar_dev_scope_status;
 }
 
-void dmar_register_bus_notifier(void)
+void __init dmar_register_bus_notifier(void)
 {
        bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
 }
index c5f4f7691b571d8ef3ba1677d5353ca9a2b31be7..85879cfec52fada1f4d2f6307e98aa2512a2825d 100644 (file)
@@ -1239,17 +1239,6 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
        return phys;
 }
 
-static struct iommu_group *get_device_iommu_group(struct device *dev)
-{
-       struct iommu_group *group;
-
-       group = iommu_group_get(dev);
-       if (!group)
-               group = iommu_group_alloc();
-
-       return group;
-}
-
 static int exynos_iommu_add_device(struct device *dev)
 {
        struct exynos_iommu_owner *owner = dev->archdata.iommu;
@@ -1345,7 +1334,7 @@ static const struct iommu_ops exynos_iommu_ops = {
        .unmap = exynos_iommu_unmap,
        .map_sg = default_iommu_map_sg,
        .iova_to_phys = exynos_iommu_iova_to_phys,
-       .device_group = get_device_iommu_group,
+       .device_group = generic_device_group,
        .add_device = exynos_iommu_add_device,
        .remove_device = exynos_iommu_remove_device,
        .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
index 24d1b1b420133358614c105b0e8c6e096600887b..749d8f2353466832ae3df9ace44c125f703267c0 100644 (file)
@@ -5043,7 +5043,6 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 {
        struct dmar_domain *dmar_domain = to_dmar_domain(domain);
        struct page *freelist = NULL;
-       struct intel_iommu *iommu;
        unsigned long start_pfn, last_pfn;
        unsigned int npages;
        int iommu_id, level = 0;
@@ -5062,12 +5061,9 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 
        npages = last_pfn - start_pfn + 1;
 
-       for_each_domain_iommu(iommu_id, dmar_domain) {
-               iommu = g_iommus[iommu_id];
-
+       for_each_domain_iommu(iommu_id, dmar_domain)
                iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
                                      start_pfn, npages, !freelist, 0);
-       }
 
        dma_free_pagelist(freelist);
 
index 99bc9bd64b9ecc1de640cba1432a750db6e9491f..e8cd984cf9c840f18d0e6b1fd166372126efd2d6 100644 (file)
@@ -396,6 +396,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
                                pasid_max - 1, GFP_KERNEL);
                if (ret < 0) {
                        kfree(svm);
+                       kfree(sdev);
                        goto out;
                }
                svm->pasid = ret;
@@ -422,17 +423,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
                iommu->pasid_table[svm->pasid].val = pasid_entry_val;
 
                wmb();
-               /* In caching mode, we still have to flush with PASID 0 when
-                * a PASID table entry becomes present. Not entirely clear
-                * *why* that would be the case â€” surely we could just issue
-                * a flush with the PASID value that we've changed? The PASID
-                * is the index into the table, after all. It's not like domain
-                * IDs in the case of the equivalent context-entry change in
-                * caching mode. And for that matter it's not entirely clear why
-                * a VMM would be in the business of caching the PASID table
-                * anyway. Surely that can be left entirely to the guest? */
+
+               /*
+                * Flush PASID cache when a PASID table entry becomes
+                * present.
+                */
                if (cap_caching_mode(iommu->cap))
-                       intel_flush_pasid_dev(svm, sdev, 0);
+                       intel_flush_pasid_dev(svm, sdev, svm->pasid);
        }
        list_add_rcu(&sdev->list, &svm->devs);
 
index 2ca08dc9331ca1db30c5bd12435c6c36b809d3f5..10e4a3d11c02261b69e97662f41a06d4e72fc34f 100644 (file)
@@ -357,8 +357,8 @@ static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
        return false;
 }
 
-static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
-                          size_t, int, arm_v7s_iopte *);
+static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
+                             size_t, int, arm_v7s_iopte *);
 
 static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
                            unsigned long iova, phys_addr_t paddr, int prot,
@@ -541,9 +541,10 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
        return pte;
 }
 
-static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
-                                  unsigned long iova, size_t size,
-                                  arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep)
+static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
+                                     unsigned long iova, size_t size,
+                                     arm_v7s_iopte blk_pte,
+                                     arm_v7s_iopte *ptep)
 {
        struct io_pgtable_cfg *cfg = &data->iop.cfg;
        arm_v7s_iopte pte, *tablep;
@@ -584,9 +585,9 @@ static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
        return size;
 }
 
-static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
-                           unsigned long iova, size_t size, int lvl,
-                           arm_v7s_iopte *ptep)
+static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
+                             unsigned long iova, size_t size, int lvl,
+                             arm_v7s_iopte *ptep)
 {
        arm_v7s_iopte pte[ARM_V7S_CONT_PAGES];
        struct io_pgtable *iop = &data->iop;
@@ -656,8 +657,8 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
        return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
 }
 
-static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-                        size_t size)
+static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+                           size_t size)
 {
        struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
 
index 51e5c43caed18c4c5066b808aa318cd953797afe..39c2a056da210df983b084ead79aa44fa661aadb 100644 (file)
@@ -21,6 +21,7 @@
 #define pr_fmt(fmt)    "arm-lpae io-pgtable: " fmt
 
 #include <linux/atomic.h>
+#include <linux/bitops.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
@@ -32,7 +33,7 @@
 
 #include "io-pgtable.h"
 
-#define ARM_LPAE_MAX_ADDR_BITS         48
+#define ARM_LPAE_MAX_ADDR_BITS         52
 #define ARM_LPAE_S2_MAX_CONCAT_PAGES   16
 #define ARM_LPAE_MAX_LEVELS            4
 
@@ -86,6 +87,8 @@
 #define ARM_LPAE_PTE_TYPE_TABLE                3
 #define ARM_LPAE_PTE_TYPE_PAGE         3
 
+#define ARM_LPAE_PTE_ADDR_MASK         GENMASK_ULL(47,12)
+
 #define ARM_LPAE_PTE_NSTABLE           (((arm_lpae_iopte)1) << 63)
 #define ARM_LPAE_PTE_XN                        (((arm_lpae_iopte)3) << 53)
 #define ARM_LPAE_PTE_AF                        (((arm_lpae_iopte)1) << 10)
 #define ARM_LPAE_TCR_PS_42_BIT         0x3ULL
 #define ARM_LPAE_TCR_PS_44_BIT         0x4ULL
 #define ARM_LPAE_TCR_PS_48_BIT         0x5ULL
+#define ARM_LPAE_TCR_PS_52_BIT         0x6ULL
 
 #define ARM_LPAE_MAIR_ATTR_SHIFT(n)    ((n) << 3)
 #define ARM_LPAE_MAIR_ATTR_MASK                0xff
 #define ARM_LPAE_MAIR_ATTR_IDX_DEV     2
 
 /* IOPTE accessors */
-#define iopte_deref(pte,d)                                     \
-       (__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)    \
-       & ~(ARM_LPAE_GRANULE(d) - 1ULL)))
+#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
 #define iopte_type(pte,l)                                      \
        (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
                (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) : \
                (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK))
 
-#define iopte_to_pfn(pte,d)                                    \
-       (((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) >> (d)->pg_shift)
-
-#define pfn_to_iopte(pfn,d)                                    \
-       (((pfn) << (d)->pg_shift) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1))
-
 struct arm_lpae_io_pgtable {
        struct io_pgtable       iop;
 
@@ -203,6 +199,27 @@ struct arm_lpae_io_pgtable {
 
 typedef u64 arm_lpae_iopte;
 
+static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
+                                    struct arm_lpae_io_pgtable *data)
+{
+       arm_lpae_iopte pte = paddr;
+
+       /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
+       return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK;
+}
+
+static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
+                                 struct arm_lpae_io_pgtable *data)
+{
+       u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
+
+       if (data->pg_shift < 16)
+               return paddr;
+
+       /* Rotate the packed high-order bits back to the top */
+       return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
+}
+
 static bool selftest_running = false;
 
 static dma_addr_t __arm_lpae_dma_addr(void *pages)
@@ -268,9 +285,9 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
                __arm_lpae_sync_pte(ptep, cfg);
 }
 
-static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
-                           unsigned long iova, size_t size, int lvl,
-                           arm_lpae_iopte *ptep);
+static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+                              unsigned long iova, size_t size, int lvl,
+                              arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
                                phys_addr_t paddr, arm_lpae_iopte prot,
@@ -287,7 +304,7 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
                pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
        pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
-       pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
+       pte |= paddr_to_iopte(paddr, data);
 
        __arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
 }
@@ -506,10 +523,10 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
        kfree(data);
 }
 
-static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
-                                   unsigned long iova, size_t size,
-                                   arm_lpae_iopte blk_pte, int lvl,
-                                   arm_lpae_iopte *ptep)
+static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
+                                      unsigned long iova, size_t size,
+                                      arm_lpae_iopte blk_pte, int lvl,
+                                      arm_lpae_iopte *ptep)
 {
        struct io_pgtable_cfg *cfg = &data->iop.cfg;
        arm_lpae_iopte pte, *tablep;
@@ -528,7 +545,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
        if (size == split_sz)
                unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
 
-       blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift;
+       blk_paddr = iopte_to_paddr(blk_pte, data);
        pte = iopte_prot(blk_pte);
 
        for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
@@ -560,9 +577,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
        return size;
 }
 
-static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
-                           unsigned long iova, size_t size, int lvl,
-                           arm_lpae_iopte *ptep)
+static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+                              unsigned long iova, size_t size, int lvl,
+                              arm_lpae_iopte *ptep)
 {
        arm_lpae_iopte pte;
        struct io_pgtable *iop = &data->iop;
@@ -606,8 +623,8 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
        return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
 }
 
-static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-                         size_t size)
+static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+                            size_t size)
 {
        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
        arm_lpae_iopte *ptep = data->pgd;
@@ -652,12 +669,13 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 
 found_translation:
        iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
-       return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
+       return iopte_to_paddr(pte, data) | iova;
 }
 
 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 {
-       unsigned long granule;
+       unsigned long granule, page_sizes;
+       unsigned int max_addr_bits = 48;
 
        /*
         * We need to restrict the supported page sizes to match the
@@ -677,17 +695,24 @@ static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 
        switch (granule) {
        case SZ_4K:
-               cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
+               page_sizes = (SZ_4K | SZ_2M | SZ_1G);
                break;
        case SZ_16K:
-               cfg->pgsize_bitmap &= (SZ_16K | SZ_32M);
+               page_sizes = (SZ_16K | SZ_32M);
                break;
        case SZ_64K:
-               cfg->pgsize_bitmap &= (SZ_64K | SZ_512M);
+               max_addr_bits = 52;
+               page_sizes = (SZ_64K | SZ_512M);
+               if (cfg->oas > 48)
+                       page_sizes |= 1ULL << 42; /* 4TB */
                break;
        default:
-               cfg->pgsize_bitmap = 0;
+               page_sizes = 0;
        }
+
+       cfg->pgsize_bitmap &= page_sizes;
+       cfg->ias = min(cfg->ias, max_addr_bits);
+       cfg->oas = min(cfg->oas, max_addr_bits);
 }
 
 static struct arm_lpae_io_pgtable *
@@ -784,6 +809,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
        case 48:
                reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT);
                break;
+       case 52:
+               reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+               break;
        default:
                goto out_free_data;
        }
@@ -891,6 +919,9 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
        case 48:
                reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT);
                break;
+       case 52:
+               reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_PS_SHIFT);
+               break;
        default:
                goto out_free_data;
        }
index cd2e1eafffe6883f6b43cb38bc23fb1c31bd4895..2df79093cad919a9e51c6a0a918d482db27c92fb 100644 (file)
@@ -119,8 +119,8 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
        int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
                   phys_addr_t paddr, size_t size, int prot);
-       int (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
-                    size_t size);
+       size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
+                       size_t size);
        phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
                                    unsigned long iova);
 };
index 69fef991c651de26c60e5c3cb5f2e6be0c0d0fcb..d2aa23202bb91c15337a6d6e95bb36afd8b531e7 100644 (file)
@@ -1573,10 +1573,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 
        if (unlikely(ops->unmap == NULL ||
                     domain->pgsize_bitmap == 0UL))
-               return -ENODEV;
+               return 0;
 
        if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
-               return -EINVAL;
+               return 0;
 
        /* find out the minimum page size supported */
        min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
@@ -1589,7 +1589,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
        if (!IS_ALIGNED(iova | size, min_pagesz)) {
                pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
                       iova, size, min_pagesz);
-               return -EINVAL;
+               return 0;
        }
 
        pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
index f227d73e7bf6e0f28f66752100798c8cc9318184..f2832a10fcea29befea161da78adda8e3ae3472c 100644 (file)
@@ -60,7 +60,7 @@
        (((prot) & 0x3) << F_MMU_TF_PROTECT_SEL_SHIFT(data))
 
 #define REG_MMU_IVRP_PADDR                     0x114
-#define F_MMU_IVRP_PA_SET(pa, ext)             (((pa) >> 1) | ((!!(ext)) << 31))
+
 #define REG_MMU_VLD_PA_RNG                     0x118
 #define F_MMU_VLD_PA_RNG(EA, SA)               (((EA) << 8) | (SA))
 
@@ -539,8 +539,13 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
                F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
        writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
 
-       writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
-                      data->base + REG_MMU_IVRP_PADDR);
+       if (data->m4u_plat == M4U_MT8173)
+               regval = (data->protect_base >> 1) | (data->enable_4GB << 31);
+       else
+               regval = lower_32_bits(data->protect_base) |
+                        upper_32_bits(data->protect_base);
+       writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
+
        if (data->enable_4GB && data->m4u_plat != M4U_MT8173) {
                /*
                 * If 4GB mode is enabled, the validate PA range is from
@@ -695,6 +700,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev)
        reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG);
        reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0);
        reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL);
+       reg->ivrp_paddr = readl_relaxed(base + REG_MMU_IVRP_PADDR);
        clk_disable_unprepare(data->bclk);
        return 0;
 }
@@ -717,8 +723,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
        writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
        writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
        writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
-       writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
-                      base + REG_MMU_IVRP_PADDR);
+       writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR);
        if (data->m4u_dom)
                writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
                       base + REG_MMU_PT_BASE_ADDR);
index b4451a1c7c2f167060edac5dbe5ce53a99b21144..778498b8633fc63d4383ee0975741a8acffb3b5f 100644 (file)
@@ -32,6 +32,7 @@ struct mtk_iommu_suspend_reg {
        u32                             ctrl_reg;
        u32                             int_control0;
        u32                             int_main_control;
+       u32                             ivrp_paddr;
 };
 
 enum mtk_iommu_plat {
index 5a96fd14ac22876825aa4023d153558120743d61..a7c2a973784f3d4209d9b34de328a66102fee75f 100644 (file)
@@ -417,20 +417,12 @@ static int mtk_iommu_create_mapping(struct device *dev,
                m4udev->archdata.iommu = mtk_mapping;
        }
 
-       ret = arm_iommu_attach_device(dev, mtk_mapping);
-       if (ret)
-               goto err_release_mapping;
-
        return 0;
-
-err_release_mapping:
-       arm_iommu_release_mapping(mtk_mapping);
-       m4udev->archdata.iommu = NULL;
-       return ret;
 }
 
 static int mtk_iommu_add_device(struct device *dev)
 {
+       struct dma_iommu_mapping *mtk_mapping;
        struct of_phandle_args iommu_spec;
        struct of_phandle_iterator it;
        struct mtk_iommu_data *data;
@@ -451,15 +443,30 @@ static int mtk_iommu_add_device(struct device *dev)
        if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
                return -ENODEV; /* Not a iommu client device */
 
-       data = dev->iommu_fwspec->iommu_priv;
-       iommu_device_link(&data->iommu, dev);
-
-       group = iommu_group_get_for_dev(dev);
+       /*
+        * This is a short-term bodge because the ARM DMA code doesn't
+        * understand multi-device groups, but we have to call into it
+        * successfully (and not just rely on a normal IOMMU API attach
+        * here) in order to set the correct DMA API ops on @dev.
+        */
+       group = iommu_group_alloc();
        if (IS_ERR(group))
                return PTR_ERR(group);
 
+       err = iommu_group_add_device(group, dev);
        iommu_group_put(group);
-       return 0;
+       if (err)
+               return err;
+
+       data = dev->iommu_fwspec->iommu_priv;
+       mtk_mapping = data->dev->archdata.iommu;
+       err = arm_iommu_attach_device(dev, mtk_mapping);
+       if (err) {
+               iommu_group_remove_device(dev);
+               return err;
+       }
+
+       return iommu_device_link(&data->iommu, dev);;
 }
 
 static void mtk_iommu_remove_device(struct device *dev)
@@ -476,24 +483,6 @@ static void mtk_iommu_remove_device(struct device *dev)
        iommu_fwspec_free(dev);
 }
 
-static struct iommu_group *mtk_iommu_device_group(struct device *dev)
-{
-       struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
-
-       if (!data)
-               return ERR_PTR(-ENODEV);
-
-       /* All the client devices are in the same m4u iommu-group */
-       if (!data->m4u_group) {
-               data->m4u_group = iommu_group_alloc();
-               if (IS_ERR(data->m4u_group))
-                       dev_err(dev, "Failed to allocate M4U IOMMU group\n");
-       } else {
-               iommu_group_ref_get(data->m4u_group);
-       }
-       return data->m4u_group;
-}
-
 static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
 {
        u32 regval;
@@ -546,7 +535,6 @@ static struct iommu_ops mtk_iommu_ops = {
        .iova_to_phys   = mtk_iommu_iova_to_phys,
        .add_device     = mtk_iommu_add_device,
        .remove_device  = mtk_iommu_remove_device,
-       .device_group   = mtk_iommu_device_group,
        .pgsize_bitmap  = ~0UL << MT2701_IOMMU_PAGE_SHIFT,
 };
 
index e135ab830ebfef6809e33434c76a53ed19c7b7d7..c33b7b104e72a85dc0355a3c2d996cddeed29218 100644 (file)
@@ -1536,7 +1536,7 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev)
        struct iommu_group *group = ERR_PTR(-EINVAL);
 
        if (arch_data->iommu_dev)
-               group = arch_data->iommu_dev->group;
+               group = iommu_group_ref_get(arch_data->iommu_dev->group);
 
        return group;
 }
index 9d991c2d87674c1dc19d3dd1d64f4c287041f4db..5fc8656c60f968b30148eaabbeefa4e63a2f40e2 100644 (file)
@@ -4,6 +4,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/clk.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iommu.h>
-#include <linux/jiffies.h>
+#include <linux/iopoll.h>
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
 #define RK_MMU_AUTO_GATING     0x24
 
 #define DTE_ADDR_DUMMY         0xCAFEBABE
-#define FORCE_RESET_TIMEOUT    100     /* ms */
+
+#define RK_MMU_POLL_PERIOD_US          100
+#define RK_MMU_FORCE_RESET_TIMEOUT_US  100000
+#define RK_MMU_POLL_TIMEOUT_US         1000
 
 /* RK_MMU_STATUS fields */
 #define RK_MMU_STATUS_PAGING_ENABLED       BIT(0)
   */
 #define RK_IOMMU_PGSIZE_BITMAP 0x007ff000
 
-#define IOMMU_REG_POLL_COUNT_FAST 1000
-
 struct rk_iommu_domain {
        struct list_head iommus;
-       struct platform_device *pdev;
        u32 *dt; /* page directory table */
        dma_addr_t dt_dma;
        spinlock_t iommus_lock; /* lock for iommus list */
@@ -86,24 +89,37 @@ struct rk_iommu_domain {
        struct iommu_domain domain;
 };
 
+/* list of clocks required by IOMMU */
+static const char * const rk_iommu_clocks[] = {
+       "aclk", "iface",
+};
+
 struct rk_iommu {
        struct device *dev;
        void __iomem **bases;
        int num_mmu;
-       int *irq;
-       int num_irq;
+       struct clk_bulk_data *clocks;
+       int num_clocks;
        bool reset_disabled;
        struct iommu_device iommu;
        struct list_head node; /* entry in rk_iommu_domain.iommus */
        struct iommu_domain *domain; /* domain to which iommu is attached */
+       struct iommu_group *group;
+};
+
+struct rk_iommudata {
+       struct device_link *link; /* runtime PM link from IOMMU to master */
+       struct rk_iommu *iommu;
 };
 
+static struct device *dma_dev;
+
 static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
                                  unsigned int count)
 {
        size_t size = count * sizeof(u32); /* count of u32 entry */
 
-       dma_sync_single_for_device(&dom->pdev->dev, dma, size, DMA_TO_DEVICE);
+       dma_sync_single_for_device(dma_dev, dma, size, DMA_TO_DEVICE);
 }
 
 static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
@@ -111,27 +127,6 @@ static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
        return container_of(dom, struct rk_iommu_domain, domain);
 }
 
-/**
- * Inspired by _wait_for in intel_drv.h
- * This is NOT safe for use in interrupt context.
- *
- * Note that it's important that we check the condition again after having
- * timed out, since the timeout could be due to preemption or similar and
- * we've never had a chance to check the condition before the timeout.
- */
-#define rk_wait_for(COND, MS) ({ \
-       unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1;   \
-       int ret__ = 0;                                                  \
-       while (!(COND)) {                                               \
-               if (time_after(jiffies, timeout__)) {                   \
-                       ret__ = (COND) ? 0 : -ETIMEDOUT;                \
-                       break;                                          \
-               }                                                       \
-               usleep_range(50, 100);                                  \
-       }                                                               \
-       ret__;                                                          \
-})
-
 /*
  * The Rockchip rk3288 iommu uses a 2-level page table.
  * The first level is the "Directory Table" (DT).
@@ -296,19 +291,21 @@ static void rk_iommu_base_command(void __iomem *base, u32 command)
 {
        writel(command, base + RK_MMU_COMMAND);
 }
-static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova,
+static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova_start,
                               size_t size)
 {
        int i;
-
-       dma_addr_t iova_end = iova + size;
+       dma_addr_t iova_end = iova_start + size;
        /*
         * TODO(djkurtz): Figure out when it is more efficient to shootdown the
         * entire iotlb rather than iterate over individual iovas.
         */
-       for (i = 0; i < iommu->num_mmu; i++)
-               for (; iova < iova_end; iova += SPAGE_SIZE)
+       for (i = 0; i < iommu->num_mmu; i++) {
+               dma_addr_t iova;
+
+               for (iova = iova_start; iova < iova_end; iova += SPAGE_SIZE)
                        rk_iommu_write(iommu->bases[i], RK_MMU_ZAP_ONE_LINE, iova);
+       }
 }
 
 static bool rk_iommu_is_stall_active(struct rk_iommu *iommu)
@@ -335,9 +332,21 @@ static bool rk_iommu_is_paging_enabled(struct rk_iommu *iommu)
        return enable;
 }
 
+static bool rk_iommu_is_reset_done(struct rk_iommu *iommu)
+{
+       bool done = true;
+       int i;
+
+       for (i = 0; i < iommu->num_mmu; i++)
+               done &= rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR) == 0;
+
+       return done;
+}
+
 static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 {
        int ret, i;
+       bool val;
 
        if (rk_iommu_is_stall_active(iommu))
                return 0;
@@ -348,7 +357,9 @@ static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 
        rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_STALL);
 
-       ret = rk_wait_for(rk_iommu_is_stall_active(iommu), 1);
+       ret = readx_poll_timeout(rk_iommu_is_stall_active, iommu, val,
+                                val, RK_MMU_POLL_PERIOD_US,
+                                RK_MMU_POLL_TIMEOUT_US);
        if (ret)
                for (i = 0; i < iommu->num_mmu; i++)
                        dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n",
@@ -360,13 +371,16 @@ static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 static int rk_iommu_disable_stall(struct rk_iommu *iommu)
 {
        int ret, i;
+       bool val;
 
        if (!rk_iommu_is_stall_active(iommu))
                return 0;
 
        rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_STALL);
 
-       ret = rk_wait_for(!rk_iommu_is_stall_active(iommu), 1);
+       ret = readx_poll_timeout(rk_iommu_is_stall_active, iommu, val,
+                                !val, RK_MMU_POLL_PERIOD_US,
+                                RK_MMU_POLL_TIMEOUT_US);
        if (ret)
                for (i = 0; i < iommu->num_mmu; i++)
                        dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n",
@@ -378,13 +392,16 @@ static int rk_iommu_disable_stall(struct rk_iommu *iommu)
 static int rk_iommu_enable_paging(struct rk_iommu *iommu)
 {
        int ret, i;
+       bool val;
 
        if (rk_iommu_is_paging_enabled(iommu))
                return 0;
 
        rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_PAGING);
 
-       ret = rk_wait_for(rk_iommu_is_paging_enabled(iommu), 1);
+       ret = readx_poll_timeout(rk_iommu_is_paging_enabled, iommu, val,
+                                val, RK_MMU_POLL_PERIOD_US,
+                                RK_MMU_POLL_TIMEOUT_US);
        if (ret)
                for (i = 0; i < iommu->num_mmu; i++)
                        dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n",
@@ -396,13 +413,16 @@ static int rk_iommu_enable_paging(struct rk_iommu *iommu)
 static int rk_iommu_disable_paging(struct rk_iommu *iommu)
 {
        int ret, i;
+       bool val;
 
        if (!rk_iommu_is_paging_enabled(iommu))
                return 0;
 
        rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_PAGING);
 
-       ret = rk_wait_for(!rk_iommu_is_paging_enabled(iommu), 1);
+       ret = readx_poll_timeout(rk_iommu_is_paging_enabled, iommu, val,
+                                !val, RK_MMU_POLL_PERIOD_US,
+                                RK_MMU_POLL_TIMEOUT_US);
        if (ret)
                for (i = 0; i < iommu->num_mmu; i++)
                        dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n",
@@ -415,6 +435,7 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
 {
        int ret, i;
        u32 dte_addr;
+       bool val;
 
        if (iommu->reset_disabled)
                return 0;
@@ -435,13 +456,12 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
 
        rk_iommu_command(iommu, RK_MMU_CMD_FORCE_RESET);
 
-       for (i = 0; i < iommu->num_mmu; i++) {
-               ret = rk_wait_for(rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR) == 0x00000000,
-                                 FORCE_RESET_TIMEOUT);
-               if (ret) {
-                       dev_err(iommu->dev, "FORCE_RESET command timed out\n");
-                       return ret;
-               }
+       ret = readx_poll_timeout(rk_iommu_is_reset_done, iommu, val,
+                                val, RK_MMU_FORCE_RESET_TIMEOUT_US,
+                                RK_MMU_POLL_TIMEOUT_US);
+       if (ret) {
+               dev_err(iommu->dev, "FORCE_RESET command timed out\n");
+               return ret;
        }
 
        return 0;
@@ -503,6 +523,12 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
        irqreturn_t ret = IRQ_NONE;
        int i;
 
+       if (WARN_ON(!pm_runtime_get_if_in_use(iommu->dev)))
+               return 0;
+
+       if (WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks)))
+               goto out;
+
        for (i = 0; i < iommu->num_mmu; i++) {
                int_status = rk_iommu_read(iommu->bases[i], RK_MMU_INT_STATUS);
                if (int_status == 0)
@@ -549,6 +575,10 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
                rk_iommu_write(iommu->bases[i], RK_MMU_INT_CLEAR, int_status);
        }
 
+       clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+
+out:
+       pm_runtime_put(iommu->dev);
        return ret;
 }
 
@@ -590,8 +620,17 @@ static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain,
        spin_lock_irqsave(&rk_domain->iommus_lock, flags);
        list_for_each(pos, &rk_domain->iommus) {
                struct rk_iommu *iommu;
+
                iommu = list_entry(pos, struct rk_iommu, node);
-               rk_iommu_zap_lines(iommu, iova, size);
+
+               /* Only zap TLBs of IOMMUs that are powered on. */
+               if (pm_runtime_get_if_in_use(iommu->dev)) {
+                       WARN_ON(clk_bulk_enable(iommu->num_clocks,
+                                               iommu->clocks));
+                       rk_iommu_zap_lines(iommu, iova, size);
+                       clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+                       pm_runtime_put(iommu->dev);
+               }
        }
        spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
 }
@@ -608,7 +647,6 @@ static void rk_iommu_zap_iova_first_last(struct rk_iommu_domain *rk_domain,
 static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
                                  dma_addr_t iova)
 {
-       struct device *dev = &rk_domain->pdev->dev;
        u32 *page_table, *dte_addr;
        u32 dte_index, dte;
        phys_addr_t pt_phys;
@@ -626,9 +664,9 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
        if (!page_table)
                return ERR_PTR(-ENOMEM);
 
-       pt_dma = dma_map_single(dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
-       if (dma_mapping_error(dev, pt_dma)) {
-               dev_err(dev, "DMA mapping error while allocating page table\n");
+       pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
+       if (dma_mapping_error(dma_dev, pt_dma)) {
+               dev_err(dma_dev, "DMA mapping error while allocating page table\n");
                free_page((unsigned long)page_table);
                return ERR_PTR(-ENOMEM);
        }
@@ -790,52 +828,46 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
 
 static struct rk_iommu *rk_iommu_from_dev(struct device *dev)
 {
-       struct iommu_group *group;
-       struct device *iommu_dev;
-       struct rk_iommu *rk_iommu;
+       struct rk_iommudata *data = dev->archdata.iommu;
 
-       group = iommu_group_get(dev);
-       if (!group)
-               return NULL;
-       iommu_dev = iommu_group_get_iommudata(group);
-       rk_iommu = dev_get_drvdata(iommu_dev);
-       iommu_group_put(group);
+       return data ? data->iommu : NULL;
+}
+
+/* Must be called with iommu powered on and attached */
+static void rk_iommu_disable(struct rk_iommu *iommu)
+{
+       int i;
 
-       return rk_iommu;
+       /* Ignore error while disabling, just keep going */
+       WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks));
+       rk_iommu_enable_stall(iommu);
+       rk_iommu_disable_paging(iommu);
+       for (i = 0; i < iommu->num_mmu; i++) {
+               rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, 0);
+               rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0);
+       }
+       rk_iommu_disable_stall(iommu);
+       clk_bulk_disable(iommu->num_clocks, iommu->clocks);
 }
 
-static int rk_iommu_attach_device(struct iommu_domain *domain,
-                                 struct device *dev)
+/* Must be called with iommu powered on and attached */
+static int rk_iommu_enable(struct rk_iommu *iommu)
 {
-       struct rk_iommu *iommu;
+       struct iommu_domain *domain = iommu->domain;
        struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
-       unsigned long flags;
        int ret, i;
 
-       /*
-        * Allow 'virtual devices' (e.g., drm) to attach to domain.
-        * Such a device does not belong to an iommu group.
-        */
-       iommu = rk_iommu_from_dev(dev);
-       if (!iommu)
-               return 0;
+       ret = clk_bulk_enable(iommu->num_clocks, iommu->clocks);
+       if (ret)
+               return ret;
 
        ret = rk_iommu_enable_stall(iommu);
        if (ret)
-               return ret;
+               goto out_disable_clocks;
 
        ret = rk_iommu_force_reset(iommu);
        if (ret)
-               return ret;
-
-       iommu->domain = domain;
-
-       for (i = 0; i < iommu->num_irq; i++) {
-               ret = devm_request_irq(iommu->dev, iommu->irq[i], rk_iommu_irq,
-                                      IRQF_SHARED, dev_name(dev), iommu);
-               if (ret)
-                       return ret;
-       }
+               goto out_disable_stall;
 
        for (i = 0; i < iommu->num_mmu; i++) {
                rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
@@ -845,18 +877,12 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
        }
 
        ret = rk_iommu_enable_paging(iommu);
-       if (ret)
-               return ret;
-
-       spin_lock_irqsave(&rk_domain->iommus_lock, flags);
-       list_add_tail(&iommu->node, &rk_domain->iommus);
-       spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
-
-       dev_dbg(dev, "Attached to iommu domain\n");
 
+out_disable_stall:
        rk_iommu_disable_stall(iommu);
-
-       return 0;
+out_disable_clocks:
+       clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+       return ret;
 }
 
 static void rk_iommu_detach_device(struct iommu_domain *domain,
@@ -865,60 +891,90 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
        struct rk_iommu *iommu;
        struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
        unsigned long flags;
-       int i;
 
        /* Allow 'virtual devices' (eg drm) to detach from domain */
        iommu = rk_iommu_from_dev(dev);
        if (!iommu)
                return;
 
+       dev_dbg(dev, "Detaching from iommu domain\n");
+
+       /* iommu already detached */
+       if (iommu->domain != domain)
+               return;
+
+       iommu->domain = NULL;
+
        spin_lock_irqsave(&rk_domain->iommus_lock, flags);
        list_del_init(&iommu->node);
        spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
 
-       /* Ignore error while disabling, just keep going */
-       rk_iommu_enable_stall(iommu);
-       rk_iommu_disable_paging(iommu);
-       for (i = 0; i < iommu->num_mmu; i++) {
-               rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, 0);
-               rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0);
+       if (pm_runtime_get_if_in_use(iommu->dev)) {
+               rk_iommu_disable(iommu);
+               pm_runtime_put(iommu->dev);
        }
-       rk_iommu_disable_stall(iommu);
+}
 
-       for (i = 0; i < iommu->num_irq; i++)
-               devm_free_irq(iommu->dev, iommu->irq[i], iommu);
+static int rk_iommu_attach_device(struct iommu_domain *domain,
+               struct device *dev)
+{
+       struct rk_iommu *iommu;
+       struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
+       unsigned long flags;
+       int ret;
 
-       iommu->domain = NULL;
+       /*
+        * Allow 'virtual devices' (e.g., drm) to attach to domain.
+        * Such a device does not belong to an iommu group.
+        */
+       iommu = rk_iommu_from_dev(dev);
+       if (!iommu)
+               return 0;
+
+       dev_dbg(dev, "Attaching to iommu domain\n");
+
+       /* iommu already attached */
+       if (iommu->domain == domain)
+               return 0;
 
-       dev_dbg(dev, "Detached from iommu domain\n");
+       if (iommu->domain)
+               rk_iommu_detach_device(iommu->domain, dev);
+
+       iommu->domain = domain;
+
+       spin_lock_irqsave(&rk_domain->iommus_lock, flags);
+       list_add_tail(&iommu->node, &rk_domain->iommus);
+       spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
+
+       if (!pm_runtime_get_if_in_use(iommu->dev))
+               return 0;
+
+       ret = rk_iommu_enable(iommu);
+       if (ret)
+               rk_iommu_detach_device(iommu->domain, dev);
+
+       pm_runtime_put(iommu->dev);
+
+       return ret;
 }
 
 static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 {
        struct rk_iommu_domain *rk_domain;
-       struct platform_device *pdev;
-       struct device *iommu_dev;
 
        if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
                return NULL;
 
-       /* Register a pdev per domain, so DMA API can base on this *dev
-        * even some virtual master doesn't have an iommu slave
-        */
-       pdev = platform_device_register_simple("rk_iommu_domain",
-                                              PLATFORM_DEVID_AUTO, NULL, 0);
-       if (IS_ERR(pdev))
+       if (!dma_dev)
                return NULL;
 
-       rk_domain = devm_kzalloc(&pdev->dev, sizeof(*rk_domain), GFP_KERNEL);
+       rk_domain = devm_kzalloc(dma_dev, sizeof(*rk_domain), GFP_KERNEL);
        if (!rk_domain)
-               goto err_unreg_pdev;
-
-       rk_domain->pdev = pdev;
+               return NULL;
 
        if (type == IOMMU_DOMAIN_DMA &&
            iommu_get_dma_cookie(&rk_domain->domain))
-               goto err_unreg_pdev;
+               return NULL;
 
        /*
         * rk32xx iommus use a 2 level pagetable.
@@ -929,11 +985,10 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
        if (!rk_domain->dt)
                goto err_put_cookie;
 
-       iommu_dev = &pdev->dev;
-       rk_domain->dt_dma = dma_map_single(iommu_dev, rk_domain->dt,
+       rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt,
                                           SPAGE_SIZE, DMA_TO_DEVICE);
-       if (dma_mapping_error(iommu_dev, rk_domain->dt_dma)) {
-               dev_err(iommu_dev, "DMA map error for DT\n");
+       if (dma_mapping_error(dma_dev, rk_domain->dt_dma)) {
+               dev_err(dma_dev, "DMA map error for DT\n");
                goto err_free_dt;
        }
 
@@ -954,8 +1009,6 @@ err_free_dt:
 err_put_cookie:
        if (type == IOMMU_DOMAIN_DMA)
                iommu_put_dma_cookie(&rk_domain->domain);
-err_unreg_pdev:
-       platform_device_unregister(pdev);
 
        return NULL;
 }
@@ -972,126 +1025,82 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
                if (rk_dte_is_pt_valid(dte)) {
                        phys_addr_t pt_phys = rk_dte_pt_address(dte);
                        u32 *page_table = phys_to_virt(pt_phys);
-                       dma_unmap_single(&rk_domain->pdev->dev, pt_phys,
+                       dma_unmap_single(dma_dev, pt_phys,
                                         SPAGE_SIZE, DMA_TO_DEVICE);
                        free_page((unsigned long)page_table);
                }
        }
 
-       dma_unmap_single(&rk_domain->pdev->dev, rk_domain->dt_dma,
+       dma_unmap_single(dma_dev, rk_domain->dt_dma,
                         SPAGE_SIZE, DMA_TO_DEVICE);
        free_page((unsigned long)rk_domain->dt);
 
        if (domain->type == IOMMU_DOMAIN_DMA)
                iommu_put_dma_cookie(&rk_domain->domain);
-
-       platform_device_unregister(rk_domain->pdev);
 }
 
-static bool rk_iommu_is_dev_iommu_master(struct device *dev)
+static int rk_iommu_add_device(struct device *dev)
 {
-       struct device_node *np = dev->of_node;
-       int ret;
-
-       /*
-        * An iommu master has an iommus property containing a list of phandles
-        * to iommu nodes, each with an #iommu-cells property with value 0.
-        */
-       ret = of_count_phandle_with_args(np, "iommus", "#iommu-cells");
-       return (ret > 0);
-}
+       struct iommu_group *group;
+       struct rk_iommu *iommu;
+       struct rk_iommudata *data;
 
-static int rk_iommu_group_set_iommudata(struct iommu_group *group,
-                                       struct device *dev)
-{
-       struct device_node *np = dev->of_node;
-       struct platform_device *pd;
-       int ret;
-       struct of_phandle_args args;
+       data = dev->archdata.iommu;
+       if (!data)
+               return -ENODEV;
 
-       /*
-        * An iommu master has an iommus property containing a list of phandles
-        * to iommu nodes, each with an #iommu-cells property with value 0.
-        */
-       ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0,
-                                        &args);
-       if (ret) {
-               dev_err(dev, "of_parse_phandle_with_args(%pOF) => %d\n",
-                       np, ret);
-               return ret;
-       }
-       if (args.args_count != 0) {
-               dev_err(dev, "incorrect number of iommu params found for %pOF (found %d, expected 0)\n",
-                       args.np, args.args_count);
-               return -EINVAL;
-       }
+       iommu = rk_iommu_from_dev(dev);
 
-       pd = of_find_device_by_node(args.np);
-       of_node_put(args.np);
-       if (!pd) {
-               dev_err(dev, "iommu %pOF not found\n", args.np);
-               return -EPROBE_DEFER;
-       }
+       group = iommu_group_get_for_dev(dev);
+       if (IS_ERR(group))
+               return PTR_ERR(group);
+       iommu_group_put(group);
 
-       /* TODO(djkurtz): handle multiple slave iommus for a single master */
-       iommu_group_set_iommudata(group, &pd->dev, NULL);
+       iommu_device_link(&iommu->iommu, dev);
+       data->link = device_link_add(dev, iommu->dev, DL_FLAG_PM_RUNTIME);
 
        return 0;
 }
 
-static int rk_iommu_add_device(struct device *dev)
+static void rk_iommu_remove_device(struct device *dev)
 {
-       struct iommu_group *group;
        struct rk_iommu *iommu;
-       int ret;
-
-       if (!rk_iommu_is_dev_iommu_master(dev))
-               return -ENODEV;
+       struct rk_iommudata *data = dev->archdata.iommu;
 
-       group = iommu_group_get(dev);
-       if (!group) {
-               group = iommu_group_alloc();
-               if (IS_ERR(group)) {
-                       dev_err(dev, "Failed to allocate IOMMU group\n");
-                       return PTR_ERR(group);
-               }
-       }
+       iommu = rk_iommu_from_dev(dev);
 
-       ret = iommu_group_add_device(group, dev);
-       if (ret)
-               goto err_put_group;
+       device_link_del(data->link);
+       iommu_device_unlink(&iommu->iommu, dev);
+       iommu_group_remove_device(dev);
+}
 
-       ret = rk_iommu_group_set_iommudata(group, dev);
-       if (ret)
-               goto err_remove_device;
+static struct iommu_group *rk_iommu_device_group(struct device *dev)
+{
+       struct rk_iommu *iommu;
 
        iommu = rk_iommu_from_dev(dev);
-       if (iommu)
-               iommu_device_link(&iommu->iommu, dev);
-
-       iommu_group_put(group);
-
-       return 0;
 
-err_remove_device:
-       iommu_group_remove_device(dev);
-err_put_group:
-       iommu_group_put(group);
-       return ret;
+       return iommu_group_ref_get(iommu->group);
 }
 
-static void rk_iommu_remove_device(struct device *dev)
+static int rk_iommu_of_xlate(struct device *dev,
+                            struct of_phandle_args *args)
 {
-       struct rk_iommu *iommu;
+       struct platform_device *iommu_dev;
+       struct rk_iommudata *data;
 
-       if (!rk_iommu_is_dev_iommu_master(dev))
-               return;
+       data = devm_kzalloc(dma_dev, sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
 
-       iommu = rk_iommu_from_dev(dev);
-       if (iommu)
-               iommu_device_unlink(&iommu->iommu, dev);
+       iommu_dev = of_find_device_by_node(args->np);
 
-       iommu_group_remove_device(dev);
+       data->iommu = platform_get_drvdata(iommu_dev);
+       dev->archdata.iommu = data;
+
+       of_dev_put(iommu_dev);
+
+       return 0;
 }
 
 static const struct iommu_ops rk_iommu_ops = {
@@ -1105,31 +1114,9 @@ static const struct iommu_ops rk_iommu_ops = {
        .add_device = rk_iommu_add_device,
        .remove_device = rk_iommu_remove_device,
        .iova_to_phys = rk_iommu_iova_to_phys,
+       .device_group = rk_iommu_device_group,
        .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
-};
-
-static int rk_iommu_domain_probe(struct platform_device *pdev)
-{
-       struct device *dev = &pdev->dev;
-
-       dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), GFP_KERNEL);
-       if (!dev->dma_parms)
-               return -ENOMEM;
-
-       /* Set dma_ops for dev, otherwise it would be dummy_dma_ops */
-       arch_setup_dma_ops(dev, 0, DMA_BIT_MASK(32), NULL, false);
-
-       dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
-       dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
-
-       return 0;
-}
-
-static struct platform_driver rk_iommu_domain_driver = {
-       .probe = rk_iommu_domain_probe,
-       .driver = {
-                  .name = "rk_iommu_domain",
-       },
+       .of_xlate = rk_iommu_of_xlate,
 };
 
 static int rk_iommu_probe(struct platform_device *pdev)
@@ -1138,7 +1125,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
        struct rk_iommu *iommu;
        struct resource *res;
        int num_res = pdev->num_resources;
-       int err, i;
+       int err, i, irq;
 
        iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
        if (!iommu)
@@ -1165,50 +1152,108 @@ static int rk_iommu_probe(struct platform_device *pdev)
        if (iommu->num_mmu == 0)
                return PTR_ERR(iommu->bases[0]);
 
-       iommu->num_irq = platform_irq_count(pdev);
-       if (iommu->num_irq < 0)
-               return iommu->num_irq;
-       if (iommu->num_irq == 0)
-               return -ENXIO;
-
-       iommu->irq = devm_kcalloc(dev, iommu->num_irq, sizeof(*iommu->irq),
-                                 GFP_KERNEL);
-       if (!iommu->irq)
-               return -ENOMEM;
+       i = 0;
+       while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+               if (irq < 0)
+                       return irq;
 
-       for (i = 0; i < iommu->num_irq; i++) {
-               iommu->irq[i] = platform_get_irq(pdev, i);
-               if (iommu->irq[i] < 0) {
-                       dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq[i]);
-                       return -ENXIO;
-               }
+               err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
+                                      IRQF_SHARED, dev_name(dev), iommu);
+               if (err)
+                       return err;
        }
 
        iommu->reset_disabled = device_property_read_bool(dev,
                                        "rockchip,disable-mmu-reset");
 
-       err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+       iommu->num_clocks = ARRAY_SIZE(rk_iommu_clocks);
+       iommu->clocks = devm_kcalloc(iommu->dev, iommu->num_clocks,
+                                    sizeof(*iommu->clocks), GFP_KERNEL);
+       if (!iommu->clocks)
+               return -ENOMEM;
+
+       for (i = 0; i < iommu->num_clocks; ++i)
+               iommu->clocks[i].id = rk_iommu_clocks[i];
+
+       err = devm_clk_bulk_get(iommu->dev, iommu->num_clocks, iommu->clocks);
+       if (err)
+               return err;
+
+       err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks);
        if (err)
                return err;
 
+       iommu->group = iommu_group_alloc();
+       if (IS_ERR(iommu->group)) {
+               err = PTR_ERR(iommu->group);
+               goto err_unprepare_clocks;
+       }
+
+       err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+       if (err)
+               goto err_put_group;
+
        iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops);
+       iommu_device_set_fwnode(&iommu->iommu, &dev->of_node->fwnode);
+
        err = iommu_device_register(&iommu->iommu);
+       if (err)
+               goto err_remove_sysfs;
+
+       /*
+        * Use the first registered IOMMU device for domain to use with DMA
+        * API, since a domain might not physically correspond to a single
+        * IOMMU device..
+        */
+       if (!dma_dev)
+               dma_dev = &pdev->dev;
+
+       bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
 
+       pm_runtime_enable(dev);
+
+       return 0;
+err_remove_sysfs:
+       iommu_device_sysfs_remove(&iommu->iommu);
+err_put_group:
+       iommu_group_put(iommu->group);
+err_unprepare_clocks:
+       clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
        return err;
 }
 
-static int rk_iommu_remove(struct platform_device *pdev)
+static void rk_iommu_shutdown(struct platform_device *pdev)
 {
-       struct rk_iommu *iommu = platform_get_drvdata(pdev);
+       pm_runtime_force_suspend(&pdev->dev);
+}
 
-       if (iommu) {
-               iommu_device_sysfs_remove(&iommu->iommu);
-               iommu_device_unregister(&iommu->iommu);
-       }
+static int __maybe_unused rk_iommu_suspend(struct device *dev)
+{
+       struct rk_iommu *iommu = dev_get_drvdata(dev);
 
+       if (!iommu->domain)
+               return 0;
+
+       rk_iommu_disable(iommu);
        return 0;
 }
 
+static int __maybe_unused rk_iommu_resume(struct device *dev)
+{
+       struct rk_iommu *iommu = dev_get_drvdata(dev);
+
+       if (!iommu->domain)
+               return 0;
+
+       return rk_iommu_enable(iommu);
+}
+
+static const struct dev_pm_ops rk_iommu_pm_ops = {
+       SET_RUNTIME_PM_OPS(rk_iommu_suspend, rk_iommu_resume, NULL)
+       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                               pm_runtime_force_resume)
+};
+
 static const struct of_device_id rk_iommu_dt_ids[] = {
        { .compatible = "rockchip,iommu" },
        { /* sentinel */ }
@@ -1217,45 +1262,22 @@ MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
 
 static struct platform_driver rk_iommu_driver = {
        .probe = rk_iommu_probe,
-       .remove = rk_iommu_remove,
+       .shutdown = rk_iommu_shutdown,
        .driver = {
                   .name = "rk_iommu",
                   .of_match_table = rk_iommu_dt_ids,
+                  .pm = &rk_iommu_pm_ops,
+                  .suppress_bind_attrs = true,
        },
 };
 
 static int __init rk_iommu_init(void)
 {
-       struct device_node *np;
-       int ret;
-
-       np = of_find_matching_node(NULL, rk_iommu_dt_ids);
-       if (!np)
-               return 0;
-
-       of_node_put(np);
-
-       ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
-       if (ret)
-               return ret;
-
-       ret = platform_driver_register(&rk_iommu_domain_driver);
-       if (ret)
-               return ret;
-
-       ret = platform_driver_register(&rk_iommu_driver);
-       if (ret)
-               platform_driver_unregister(&rk_iommu_domain_driver);
-       return ret;
+       return platform_driver_register(&rk_iommu_driver);
 }
-static void __exit rk_iommu_exit(void)
-{
-       platform_driver_unregister(&rk_iommu_driver);
-       platform_driver_unregister(&rk_iommu_domain_driver);
-}
-
 subsys_initcall(rk_iommu_init);
-module_exit(rk_iommu_exit);
+
+IOMMU_OF_DECLARE(rk_iommu_of, "rockchip,iommu");
 
 MODULE_DESCRIPTION("IOMMU API for Rockchip");
 MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
index 2982e93d23698b1211edf7c0917984296135c497..5416f2b2ac21fab0373cb3e88db8d962e15bafd0 100644 (file)
@@ -3612,7 +3612,8 @@ static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
                return -ENOMEM;
        }
 
-       err = iort_register_domain_token(its_entry->translation_id, dom_handle);
+       err = iort_register_domain_token(its_entry->translation_id, res.start,
+                                        dom_handle);
        if (err) {
                pr_err("ITS@%pa: Unable to register GICv3 ITS domain token (ITS ID %d) to IORT\n",
                       &res.start, its_entry->translation_id);
index 2c8ac3688815e5f0360a47bde82d1a68bf888ce1..edff083f7c4e8b78df49d669d4b4d7a5bbd889fd 100644 (file)
@@ -201,7 +201,7 @@ config BLK_DEV_DM_BUILTIN
 config BLK_DEV_DM
        tristate "Device mapper support"
        select BLK_DEV_DM_BUILTIN
-       select DAX
+       depends on DAX || DAX=n
        ---help---
          Device-mapper is a low level volume manager.  It works by allowing
          people to specify mappings for ranges of logical sectors.  Various
index 99297212eeec887140dcaf8c4582648d8ea98f3b..775c06d953b7a0abadef32ad05c37d168651eacf 100644 (file)
@@ -154,6 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti,
        return fn(ti, lc->dev, lc->start, ti->len, data);
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
 static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
                long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -184,6 +185,11 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
        return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+#else
+#define linear_dax_direct_access NULL
+#define linear_dax_copy_from_iter NULL
+#endif
+
 static struct target_type linear_target = {
        .name   = "linear",
        .version = {1, 4, 0},
index 9de072b7782a53d5192087cb36f775d3630440a4..c90c7c08a77fab607181a2c0ccf77e0a51e439ed 100644 (file)
@@ -611,51 +611,6 @@ static int log_mark(struct log_writes_c *lc, char *data)
        return 0;
 }
 
-static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
-                  struct iov_iter *i)
-{
-       struct pending_block *block;
-
-       if (!bytes)
-               return 0;
-
-       block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
-       if (!block) {
-               DMERR("Error allocating dax pending block");
-               return -ENOMEM;
-       }
-
-       block->data = kzalloc(bytes, GFP_KERNEL);
-       if (!block->data) {
-               DMERR("Error allocating dax data space");
-               kfree(block);
-               return -ENOMEM;
-       }
-
-       /* write data provided via the iterator */
-       if (!copy_from_iter(block->data, bytes, i)) {
-               DMERR("Error copying dax data");
-               kfree(block->data);
-               kfree(block);
-               return -EIO;
-       }
-
-       /* rewind the iterator so that the block driver can use it */
-       iov_iter_revert(i, bytes);
-
-       block->datalen = bytes;
-       block->sector = bio_to_dev_sectors(lc, sector);
-       block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
-
-       atomic_inc(&lc->pending_blocks);
-       spin_lock_irq(&lc->blocks_lock);
-       list_add_tail(&block->list, &lc->unflushed_blocks);
-       spin_unlock_irq(&lc->blocks_lock);
-       wake_up_process(lc->log_kthread);
-
-       return 0;
-}
-
 static void log_writes_dtr(struct dm_target *ti)
 {
        struct log_writes_c *lc = ti->private;
@@ -925,6 +880,52 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
        limits->io_min = limits->physical_block_size;
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
+static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
+                  struct iov_iter *i)
+{
+       struct pending_block *block;
+
+       if (!bytes)
+               return 0;
+
+       block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
+       if (!block) {
+               DMERR("Error allocating dax pending block");
+               return -ENOMEM;
+       }
+
+       block->data = kzalloc(bytes, GFP_KERNEL);
+       if (!block->data) {
+               DMERR("Error allocating dax data space");
+               kfree(block);
+               return -ENOMEM;
+       }
+
+       /* write data provided via the iterator */
+       if (!copy_from_iter(block->data, bytes, i)) {
+               DMERR("Error copying dax data");
+               kfree(block->data);
+               kfree(block);
+               return -EIO;
+       }
+
+       /* rewind the iterator so that the block driver can use it */
+       iov_iter_revert(i, bytes);
+
+       block->datalen = bytes;
+       block->sector = bio_to_dev_sectors(lc, sector);
+       block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
+
+       atomic_inc(&lc->pending_blocks);
+       spin_lock_irq(&lc->blocks_lock);
+       list_add_tail(&block->list, &lc->unflushed_blocks);
+       spin_unlock_irq(&lc->blocks_lock);
+       wake_up_process(lc->log_kthread);
+
+       return 0;
+}
+
 static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
                                         long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -961,6 +962,10 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
 dax_copy:
        return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
 }
+#else
+#define log_writes_dax_direct_access NULL
+#define log_writes_dax_copy_from_iter NULL
+#endif
 
 static struct target_type log_writes_target = {
        .name   = "log-writes",
index bb907cb3e60dfc90203585d7e5abfd4ff0d93a66..fe7fb9b1aec34602ac677c07a9c0ae4134191813 100644 (file)
@@ -313,6 +313,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
        return DM_MAPIO_REMAPPED;
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
 static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
                long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -353,6 +354,11 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
        return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+#else
+#define stripe_dax_direct_access NULL
+#define stripe_dax_copy_from_iter NULL
+#endif
+
 /*
  * Stripe status:
  *
index 5a81c47be4e4ea7c94ff8f5bc6d0c1923a7b782b..4ea404dbcf0b936b3cc3c42dc76158813ecb88ec 100644 (file)
@@ -1826,7 +1826,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
 static struct mapped_device *alloc_dev(int minor)
 {
        int r, numa_node_id = dm_get_numa_node();
-       struct dax_device *dax_dev;
+       struct dax_device *dax_dev = NULL;
        struct mapped_device *md;
        void *old_md;
 
@@ -1892,9 +1892,11 @@ static struct mapped_device *alloc_dev(int minor)
        md->disk->private_data = md;
        sprintf(md->disk->disk_name, "dm-%d", minor);
 
-       dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
-       if (!dax_dev)
-               goto bad;
+       if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
+               dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+               if (!dax_dev)
+                       goto bad;
+       }
        md->dax_dev = dax_dev;
 
        add_disk_no_queue_reg(md->disk);
index fafe1ebc8affbab051150f52833b86bad93a944c..2a5df99735fa27cc2a260da565357ce5cb45d80f 100644 (file)
@@ -668,7 +668,7 @@ static void cec_pin_rx_states(struct cec_pin *pin, ktime_t ts)
                /* Start bit low is too short, go back to idle */
                if (delta < CEC_TIM_START_BIT_LOW_MIN - CEC_TIM_IDLE_SAMPLE) {
                        if (!pin->rx_start_bit_low_too_short_cnt++) {
-                               pin->rx_start_bit_low_too_short_ts = pin->ts;
+                               pin->rx_start_bit_low_too_short_ts = ktime_to_ns(pin->ts);
                                pin->rx_start_bit_low_too_short_delta = delta;
                        }
                        cec_pin_to_idle(pin);
@@ -700,7 +700,7 @@ static void cec_pin_rx_states(struct cec_pin *pin, ktime_t ts)
                /* Start bit is too short, go back to idle */
                if (delta < CEC_TIM_START_BIT_TOTAL_MIN - CEC_TIM_IDLE_SAMPLE) {
                        if (!pin->rx_start_bit_too_short_cnt++) {
-                               pin->rx_start_bit_too_short_ts = pin->ts;
+                               pin->rx_start_bit_too_short_ts = ktime_to_ns(pin->ts);
                                pin->rx_start_bit_too_short_delta = delta;
                        }
                        cec_pin_to_idle(pin);
@@ -770,7 +770,7 @@ static void cec_pin_rx_states(struct cec_pin *pin, ktime_t ts)
                 */
                if (delta < CEC_TIM_DATA_BIT_TOTAL_MIN) {
                        if (!pin->rx_data_bit_too_short_cnt++) {
-                               pin->rx_data_bit_too_short_ts = pin->ts;
+                               pin->rx_data_bit_too_short_ts = ktime_to_ns(pin->ts);
                                pin->rx_data_bit_too_short_delta = delta;
                        }
                        cec_pin_low(pin);
index 37632bc524d406fe63acb9682b285410f555f892..9b64f4f354bf8e837d502038338024003154f81e 100644 (file)
@@ -1149,7 +1149,7 @@ static void gen_twopix(struct tpg_data *tpg,
        case V4L2_PIX_FMT_NV42:
                buf[0][offset] = r_y_h;
                buf[1][2 * offset] = b_v;
-               buf[1][(2 * offset + 1) %8] = g_u_s;
+               buf[1][(2 * offset + 1) % 8] = g_u_s;
                break;
 
        case V4L2_PIX_FMT_YUYV:
index 21a7d4b47e1a48bb65498df0dfb5b3b175f66302..e33414975065619714edf564775e97a3bc146534 100644 (file)
@@ -2089,7 +2089,7 @@ static int dvb_frontend_handle_compat_ioctl(struct file *file, unsigned int cmd,
                }
                for (i = 0; i < tvps->num; i++) {
                        err = dtv_property_process_get(
-                           fe, &getp, (struct dtv_property *)tvp + i, file);
+                           fe, &getp, (struct dtv_property *)(tvp + i), file);
                        if (err < 0) {
                                kfree(tvp);
                                return err;
index 5188178588c9067d60e7a38ac08d8bd8bba08858..61514bae7e5ceb423b60e8a3a62c9a7df59a7fb4 100644 (file)
@@ -275,7 +275,8 @@ static int adv748x_afe_s_stream(struct v4l2_subdev *sd, int enable)
 {
        struct adv748x_afe *afe = adv748x_sd_to_afe(sd);
        struct adv748x_state *state = adv748x_afe_to_state(afe);
-       int ret, signal = V4L2_IN_ST_NO_SIGNAL;
+       u32 signal = V4L2_IN_ST_NO_SIGNAL;
+       int ret;
 
        mutex_lock(&state->mutex);
 
index 8dbbf0f917dffbd8188be766a0d66dd447099b1e..91fae01d052bf5568cd646d6bf521062182a43ca 100644 (file)
@@ -1,15 +1,5 @@
-/*
- * Copyright (c) 2015--2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2015--2017 Intel Corporation.
 
 #include <linux/delay.h>
 #include <linux/i2c.h>
index 664e8acdf2a012a2aaf05529cb54484edaf8d4a8..daec33f4196aa9286056ea84f3f977059c6661f7 100644 (file)
@@ -1426,7 +1426,7 @@ static int imx274_set_vflip(struct stimx274 *priv, int val)
 
        err = imx274_write_reg(priv, IMX274_VFLIP_REG, val);
        if (err) {
-               dev_err(&priv->client->dev, "VFILP control error\n");
+               dev_err(&priv->client->dev, "VFLIP control error\n");
                return err;
        }
 
index 30ee9f71bf0dae1d01f48c852707d8f0c5978a06..3dbcae257164b7d241f9bb74a691fe563eac3e95 100644 (file)
@@ -1,16 +1,5 @@
-/*
- * Copyright (c) 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Intel Corporation.
 
 #include <linux/acpi.h>
 #include <linux/i2c.h>
@@ -1375,7 +1364,9 @@ ov13858_set_pad_format(struct v4l2_subdev *sd,
        if (fmt->format.code != MEDIA_BUS_FMT_SGRBG10_1X10)
                fmt->format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 
-       mode = v4l2_find_nearest_size(supported_modes, width, height,
+       mode = v4l2_find_nearest_size(supported_modes,
+                                     ARRAY_SIZE(supported_modes),
+                                     width, height,
                                      fmt->format.width, fmt->format.height);
        ov13858_update_pad_format(mode, fmt);
        if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
index 83c55e8288e7de5903d4ef3b5703333cd47bcb57..385c1886a9470a1ad4153b61493501803be5ee5c 100644 (file)
@@ -832,7 +832,6 @@ MODULE_DEVICE_TABLE(of, ov2685_of_match);
 static struct i2c_driver ov2685_i2c_driver = {
        .driver = {
                .name = "ov2685",
-               .owner = THIS_MODULE,
                .pm = &ov2685_pm_ops,
                .of_match_table = of_match_ptr(ov2685_of_match),
        },
index 03940f0cdfa62585bb8b2dd1946c29e7fcb8b403..852026baa2e70adb933ee02a9971dd81f0f910a9 100644 (file)
@@ -1641,6 +1641,9 @@ static int ov5640_set_mode(struct ov5640_dev *sensor,
        return 0;
 }
 
+static int ov5640_set_framefmt(struct ov5640_dev *sensor,
+                              struct v4l2_mbus_framefmt *format);
+
 /* restore the last set video mode after chip power-on */
 static int ov5640_restore_mode(struct ov5640_dev *sensor)
 {
@@ -1652,7 +1655,11 @@ static int ov5640_restore_mode(struct ov5640_dev *sensor)
                return ret;
 
        /* now restore the last capture mode */
-       return ov5640_set_mode(sensor, &ov5640_mode_init_data);
+       ret = ov5640_set_mode(sensor, &ov5640_mode_init_data);
+       if (ret < 0)
+               return ret;
+
+       return ov5640_set_framefmt(sensor, &sensor->fmt);
 }
 
 static void ov5640_power(struct ov5640_dev *sensor, bool enable)
@@ -1874,7 +1881,13 @@ static int ov5640_try_fmt_internal(struct v4l2_subdev *sd,
                if (ov5640_formats[i].code == fmt->code)
                        break;
        if (i >= ARRAY_SIZE(ov5640_formats))
-               fmt->code = ov5640_formats[0].code;
+               i = 0;
+
+       fmt->code = ov5640_formats[i].code;
+       fmt->colorspace = ov5640_formats[i].colorspace;
+       fmt->ycbcr_enc = V4L2_MAP_YCBCR_ENC_DEFAULT(fmt->colorspace);
+       fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
+       fmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(fmt->colorspace);
 
        return 0;
 }
@@ -1885,6 +1898,7 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
 {
        struct ov5640_dev *sensor = to_ov5640_dev(sd);
        const struct ov5640_mode_info *new_mode;
+       struct v4l2_mbus_framefmt *mbus_fmt = &format->format;
        int ret;
 
        if (format->pad != 0)
@@ -1897,7 +1911,7 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
                goto out;
        }
 
-       ret = ov5640_try_fmt_internal(sd, &format->format,
+       ret = ov5640_try_fmt_internal(sd, mbus_fmt,
                                      sensor->current_fr, &new_mode);
        if (ret)
                goto out;
@@ -1906,12 +1920,12 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
                struct v4l2_mbus_framefmt *fmt =
                        v4l2_subdev_get_try_format(sd, cfg, 0);
 
-               *fmt = format->format;
+               *fmt = *mbus_fmt;
                goto out;
        }
 
        sensor->current_mode = new_mode;
-       sensor->fmt = format->format;
+       sensor->fmt = *mbus_fmt;
        sensor->pending_mode_change = true;
 out:
        mutex_unlock(&sensor->lock);
@@ -2496,6 +2510,7 @@ static int ov5640_probe(struct i2c_client *client,
        struct device *dev = &client->dev;
        struct fwnode_handle *endpoint;
        struct ov5640_dev *sensor;
+       struct v4l2_mbus_framefmt *fmt;
        int ret;
 
        sensor = devm_kzalloc(dev, sizeof(*sensor), GFP_KERNEL);
@@ -2503,10 +2518,15 @@ static int ov5640_probe(struct i2c_client *client,
                return -ENOMEM;
 
        sensor->i2c_client = client;
-       sensor->fmt.code = MEDIA_BUS_FMT_UYVY8_2X8;
-       sensor->fmt.width = 640;
-       sensor->fmt.height = 480;
-       sensor->fmt.field = V4L2_FIELD_NONE;
+       fmt = &sensor->fmt;
+       fmt->code = ov5640_formats[0].code;
+       fmt->colorspace = ov5640_formats[0].colorspace;
+       fmt->ycbcr_enc = V4L2_MAP_YCBCR_ENC_DEFAULT(fmt->colorspace);
+       fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
+       fmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(fmt->colorspace);
+       fmt->width = 640;
+       fmt->height = 480;
+       fmt->field = V4L2_FIELD_NONE;
        sensor->frame_interval.numerator = 1;
        sensor->frame_interval.denominator = ov5640_framerates[OV5640_30_FPS];
        sensor->current_fr = OV5640_30_FPS;
index d28845f7356f0541684c31234bc2fbf30477f6bf..4e3142a7e5a7a76daeeb605a391a85f893bc39c3 100644 (file)
@@ -959,23 +959,6 @@ __ov5645_get_pad_crop(struct ov5645 *ov5645, struct v4l2_subdev_pad_config *cfg,
        }
 }
 
-static const struct ov5645_mode_info *
-ov5645_find_nearest_mode(unsigned int width, unsigned int height)
-{
-       int i;
-
-       for (i = ARRAY_SIZE(ov5645_mode_info_data) - 1; i >= 0; i--) {
-               if (ov5645_mode_info_data[i].width <= width &&
-                   ov5645_mode_info_data[i].height <= height)
-                       break;
-       }
-
-       if (i < 0)
-               i = 0;
-
-       return &ov5645_mode_info_data[i];
-}
-
 static int ov5645_set_format(struct v4l2_subdev *sd,
                             struct v4l2_subdev_pad_config *cfg,
                             struct v4l2_subdev_format *format)
@@ -989,8 +972,11 @@ static int ov5645_set_format(struct v4l2_subdev *sd,
        __crop = __ov5645_get_pad_crop(ov5645, cfg, format->pad,
                        format->which);
 
-       new_mode = ov5645_find_nearest_mode(format->format.width,
-                                           format->format.height);
+       new_mode = v4l2_find_nearest_size(ov5645_mode_info_data,
+                              ARRAY_SIZE(ov5645_mode_info_data),
+                              width, height,
+                              format->format.width, format->format.height);
+
        __crop->width = new_mode->width;
        __crop->height = new_mode->height;
 
@@ -1131,13 +1117,14 @@ static int ov5645_probe(struct i2c_client *client,
 
        ret = v4l2_fwnode_endpoint_parse(of_fwnode_handle(endpoint),
                                         &ov5645->ep);
+
+       of_node_put(endpoint);
+
        if (ret < 0) {
                dev_err(dev, "parsing endpoint node failed\n");
                return ret;
        }
 
-       of_node_put(endpoint);
-
        if (ov5645->ep.bus_type != V4L2_MBUS_CSI2) {
                dev_err(dev, "invalid bus type, must be CSI2\n");
                return -EINVAL;
index d2db480da1b9db07d0c4c91a9256345bfb8663e9..7b7c74d7737075c3e2dd5402342afd08edf40cc7 100644 (file)
@@ -1,16 +1,5 @@
-/*
- * Copyright (c) 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Intel Corporation.
 
 #include <linux/acpi.h>
 #include <linux/i2c.h>
@@ -2230,7 +2219,9 @@ static int ov5670_set_pad_format(struct v4l2_subdev *sd,
 
        fmt->format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 
-       mode = v4l2_find_nearest_size(supported_modes, width, height,
+       mode = v4l2_find_nearest_size(supported_modes,
+                                     ARRAY_SIZE(supported_modes),
+                                     width, height,
                                      fmt->format.width, fmt->format.height);
        ov5670_update_pad_format(mode, fmt);
        if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
index 226f90886484ce4972ed39518be6f1d18775027b..af17aaa21f5840d9443771977f204acf089679e2 100644 (file)
@@ -1081,11 +1081,11 @@ static int mtk_jpeg_clk_init(struct mtk_jpeg_dev *jpeg)
 
        jpeg->clk_jdec = devm_clk_get(jpeg->dev, "jpgdec");
        if (IS_ERR(jpeg->clk_jdec))
-               return -EINVAL;
+               return PTR_ERR(jpeg->clk_jdec);
 
        jpeg->clk_jdec_smi = devm_clk_get(jpeg->dev, "jpgdec-smi");
        if (IS_ERR(jpeg->clk_jdec_smi))
-               return -EINVAL;
+               return PTR_ERR(jpeg->clk_jdec_smi);
 
        return 0;
 }
index 521d4b36c0904d1e6f25e8db688216108ad61c40..c4a577848dd7a889ae474840721e5d557f5c5026 100644 (file)
@@ -76,7 +76,7 @@ int venus_boot(struct device *dev, const char *fwname)
        }
 
        ret = qcom_mdt_load(dev, mdt, fwname, VENUS_PAS_ID, mem_va, mem_phys,
-                           mem_size);
+                           mem_size, NULL);
 
        release_firmware(mdt);
 
index c9e9576bb08a4e1f568aec1ea55ad003e66dded6..49bbd1861d3a3f5d289825ff4e76921d7eaf0ce1 100644 (file)
@@ -135,20 +135,21 @@ find_format_by_index(struct venus_inst *inst, unsigned int index, u32 type)
                return NULL;
 
        for (i = 0; i < size; i++) {
+               bool valid;
+
                if (fmt[i].type != type)
                        continue;
-               if (k == index)
+               valid = type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE ||
+                       venus_helper_check_codec(inst, fmt[i].pixfmt);
+               if (k == index && valid)
                        break;
-               k++;
+               if (valid)
+                       k++;
        }
 
        if (i == size)
                return NULL;
 
-       if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE &&
-           !venus_helper_check_codec(inst, fmt[i].pixfmt))
-               return NULL;
-
        return &fmt[i];
 }
 
index e3a10a852cade9cb8eca6b1768a73ce4cab8d0b0..6b2ce479584e9123af085aed83a2b5aa052050c8 100644 (file)
@@ -120,20 +120,21 @@ find_format_by_index(struct venus_inst *inst, unsigned int index, u32 type)
                return NULL;
 
        for (i = 0; i < size; i++) {
+               bool valid;
+
                if (fmt[i].type != type)
                        continue;
-               if (k == index)
+               valid = type != V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE ||
+                       venus_helper_check_codec(inst, fmt[i].pixfmt);
+               if (k == index && valid)
                        break;
-               k++;
+               if (valid)
+                       k++;
        }
 
        if (i == size)
                return NULL;
 
-       if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE &&
-           !venus_helper_check_codec(inst, fmt[i].pixfmt))
-               return NULL;
-
        return &fmt[i];
 }
 
index 01c7036836573a587447c6df61cc955b36d824bf..1599159f2574e5c1f9dbc9b075bdc05134bb2d5a 100644 (file)
@@ -561,8 +561,9 @@ int vivid_try_fmt_vid_cap(struct file *file, void *priv,
        mp->field = vivid_field_cap(dev, mp->field);
        if (vivid_is_webcam(dev)) {
                const struct v4l2_frmsize_discrete *sz =
-                       v4l2_find_nearest_size(webcam_sizes, width, height,
-                                              mp->width, mp->height);
+                       v4l2_find_nearest_size(webcam_sizes,
+                                              VIVID_WEBCAM_SIZES, width,
+                                              height, mp->width, mp->height);
 
                w = sz->width;
                h = sz->height;
index f7f3b4b2c2dea03f683bfa42b2f92a250de1f3f7..8bd6b2f1af1546e96bc067a7b410dac74fef2f3e 100644 (file)
@@ -452,7 +452,7 @@ static void wpf_configure(struct vsp1_entity *entity,
                        : VI6_WPF_SRCRPF_RPF_ACT_SUB(input->entity.index);
        }
 
-       if (pipe->bru || pipe->num_inputs > 1)
+       if (pipe->bru)
                srcrpf |= pipe->bru->type == VSP1_ENTITY_BRU
                        ? VI6_WPF_SRCRPF_VIRACT_MST
                        : VI6_WPF_SRCRPF_VIRACT2_MST;
index bc9299059f4830dc646726193393a32ba95d356d..3e14b9e2e763df20882c363ded7dfdfe19253b15 100644 (file)
@@ -20,6 +20,8 @@
 //
 //     RF Gain set/get is not implemented.
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/videodev2.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -2371,7 +2373,7 @@ err:
 err_no_gate:
        mutex_unlock(&r820t_list_mutex);
 
-       tuner_info("%s: failed=%d\n", __func__, rc);
+       pr_info("%s: failed=%d\n", __func__, rc);
        r820t_release(fe);
        return NULL;
 }
index 713029420fcf54eb85260596de18a484e9298549..67ed66712d058c6676001bcb3226c31d6fb9cfc3 100644 (file)
@@ -276,7 +276,7 @@ static int start_streaming(struct cx231xx_dvb *dvb)
 
        if (dev->USE_ISO) {
                dev_dbg(dev->dev, "DVB transfer mode is ISO.\n");
-               cx231xx_set_alt_setting(dev, INDEX_TS1, 4);
+               cx231xx_set_alt_setting(dev, INDEX_TS1, 5);
                rc = cx231xx_set_mode(dev, CX231XX_DIGITAL_MODE);
                if (rc < 0)
                        return rc;
index d214a21acff770fe6f024fcf685912b228ef67e9..bc9a439745aae162fc46ed5af5012738a4eefc93 100644 (file)
@@ -7,7 +7,7 @@ menuconfig USB_GSPCA
          Say Y here if you want to enable selecting webcams based
          on the GSPCA framework.
 
-         See <file:Documentation/video4linux/gspca.txt> for more info.
+         See <file:Documentation/media/v4l-drivers/gspca-cardlist.rst> for more info.
 
          This driver uses the Video For Linux API. You must say Y or M to
          "Video For Linux" to use this driver.
index 5198c9eeb3480387b021eaccf9cb941a2166fa37..4312935f1dfcec4ae13230dbaaa3ec5aa227f891 100644 (file)
@@ -101,7 +101,7 @@ static int get_v4l2_window32(struct v4l2_window __user *kp,
 static int put_v4l2_window32(struct v4l2_window __user *kp,
                             struct v4l2_window32 __user *up)
 {
-       struct v4l2_clip __user *kclips = kp->clips;
+       struct v4l2_clip __user *kclips;
        struct v4l2_clip32 __user *uclips;
        compat_caddr_t p;
        u32 clipcount;
@@ -116,6 +116,8 @@ static int put_v4l2_window32(struct v4l2_window __user *kp,
        if (!clipcount)
                return 0;
 
+       if (get_user(kclips, &kp->clips))
+               return -EFAULT;
        if (get_user(p, &up->clips))
                return -EFAULT;
        uclips = compat_ptr(p);
index 0301fe426a4358cb136e6980ef3577329219cd97..1d0b2208e8fb67b85fcf170d4d24697858c13d2d 100644 (file)
@@ -939,10 +939,14 @@ int __video_register_device(struct video_device *vdev,
 #endif
        vdev->minor = i + minor_offset;
        vdev->num = nr;
-       devnode_set(vdev);
 
        /* Should not happen since we thought this minor was free */
-       WARN_ON(video_device[vdev->minor] != NULL);
+       if (WARN_ON(video_device[vdev->minor])) {
+               mutex_unlock(&videodev_lock);
+               printk(KERN_ERR "video_device not empty!\n");
+               return -ENFILE;
+       }
+       devnode_set(vdev);
        vdev->index = get_index(vdev);
        video_device[vdev->minor] = vdev;
        mutex_unlock(&videodev_lock);
index 24108bfad88982c0688de8d4bcef17def3750587..6193270e7b3dcabf9c4f2befac666678b5cdb00d 100644 (file)
@@ -400,10 +400,14 @@ static void skip_back_repeat_test(char *arg)
        int go_back = simple_strtol(arg, NULL, 10);
 
        repeat_test--;
-       if (repeat_test <= 0)
+       if (repeat_test <= 0) {
                ts.idx++;
-       else
+       } else {
+               if (repeat_test % 100 == 0)
+                       v1printk("kgdbts:RUN ... %d remaining\n", repeat_test);
+
                ts.idx -= go_back;
+       }
        fill_get_buf(ts.tst[ts.idx].get);
 }
 
index 02485e310c81f364368a24728664493cb315ed9b..9e923cd1d80ebee9c96d70aceabb87b2a0935581 100644 (file)
@@ -3080,6 +3080,7 @@ static void __exit mmc_blk_exit(void)
        mmc_unregister_driver(&mmc_driver);
        unregister_blkdev(MMC_BLOCK_MAJOR, "mmc");
        unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES);
+       bus_unregister(&mmc_rpmb_bus_type);
 }
 
 module_init(mmc_blk_init);
index 712e08d9a45e452100d42a76fb5eed916f42bc93..a0168e9e4fce74cdb8ed6fb2e30477e809999f35 100644 (file)
@@ -362,9 +362,9 @@ static void jz4740_mmc_set_irq_enabled(struct jz4740_mmc_host *host,
                host->irq_mask &= ~irq;
        else
                host->irq_mask |= irq;
-       spin_unlock_irqrestore(&host->lock, flags);
 
        writew(host->irq_mask, host->base + JZ_REG_MMC_IMASK);
+       spin_unlock_irqrestore(&host->lock, flags);
 }
 
 static void jz4740_mmc_clock_enable(struct jz4740_mmc_host *host,
index e30df9ad8197c4b59b1ed0b40c8037d2ef2b7322..3080299303045adc5908683b5509e4420ead2063 100644 (file)
@@ -913,7 +913,7 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host)
                host->check_scc_error(host);
 
        /* If SET_BLOCK_COUNT, continue with main command */
-       if (host->mrq) {
+       if (host->mrq && !mrq->cmd->error) {
                tmio_process_mrq(host, mrq);
                return;
        }
index b1fc28f63882e75125230d05462b775cb18732f7..d0b63bbf46a792e3d168606b6dd102b7bb0c9079 100644 (file)
@@ -244,7 +244,7 @@ static int ubiblock_open(struct block_device *bdev, fmode_t mode)
         * in any case.
         */
        if (mode & FMODE_WRITE) {
-               ret = -EPERM;
+               ret = -EROFS;
                goto out_unlock;
        }
 
index e941395de3aedc0d4fc6d641e214ec9848c72e27..753494e042d51cdbfb22b93fc16c58fa43002ec0 100644 (file)
@@ -854,6 +854,17 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
                return -EINVAL;
        }
 
+       /*
+        * Both UBI and UBIFS have been designed for SLC NAND and NOR flashes.
+        * MLC NAND is different and needs special care, otherwise UBI or UBIFS
+        * will die soon and you will lose all your data.
+        */
+       if (mtd->type == MTD_MLCNANDFLASH) {
+               pr_err("ubi: refuse attaching mtd%d - MLC NAND is not supported\n",
+                       mtd->index);
+               return -EINVAL;
+       }
+
        if (ubi_num == UBI_DEV_NUM_AUTO) {
                /* Search for an empty slot in the @ubi_devices array */
                for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++)
index 590d967011bb78704c2e21be194ece15e1e5ce06..98f7d6be8d1fcba102b9c14555536c9e63aef074 100644 (file)
@@ -362,7 +362,6 @@ static void ubi_fastmap_close(struct ubi_device *ubi)
 {
        int i;
 
-       flush_work(&ubi->fm_work);
        return_unused_pool_pebs(ubi, &ubi->fm_pool);
        return_unused_pool_pebs(ubi, &ubi->fm_wl_pool);
 
index c96a92118b8b85272e7c3551dc5de31da3bf8852..32f6d2e24d6692ce00ffe077f0c0bc970c90e825 100644 (file)
@@ -951,9 +951,11 @@ void aq_nic_shutdown(struct aq_nic_s *self)
 
        netif_device_detach(self->ndev);
 
-       err = aq_nic_stop(self);
-       if (err < 0)
-               goto err_exit;
+       if (netif_running(self->ndev)) {
+               err = aq_nic_stop(self);
+               if (err < 0)
+                       goto err_exit;
+       }
        aq_nic_deinit(self);
 
 err_exit:
index 84d7f4dd4ce1a2ebe01c97e00c87ca2a9c73344a..e652d86b87d40eb9c0050c7ce525c2a5e3ab2513 100644 (file)
@@ -48,6 +48,8 @@
 #define FORCE_FLASHLESS 0
 
 static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual);
+static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
+                                     enum hal_atl_utils_fw_state_e state);
 
 int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
 {
@@ -247,6 +249,20 @@ int hw_atl_utils_soft_reset(struct aq_hw_s *self)
 
        self->rbl_enabled = (boot_exit_code != 0);
 
+       /* FW 1.x may bootup in an invalid POWER state (WOL feature).
+        * We should work around this by forcing its state back to DEINIT
+        */
+       if (!hw_atl_utils_ver_match(HW_ATL_FW_VER_1X,
+                                   aq_hw_read_reg(self,
+                                                  HW_ATL_MPI_FW_VERSION))) {
+               int err = 0;
+
+               hw_atl_utils_mpi_set_state(self, MPI_DEINIT);
+               AQ_HW_WAIT_FOR((aq_hw_read_reg(self, HW_ATL_MPI_STATE_ADR) &
+                              HW_ATL_MPI_STATE_MSK) == MPI_DEINIT,
+                              10, 1000U);
+       }
+
        if (self->rbl_enabled)
                return hw_atl_utils_soft_reset_rbl(self);
        else
index 1991f0c7bc0eedb13015c9fd94391bea72520fd0..f83769d8047b4d383adebf839bcd88775496cada 100644 (file)
@@ -6090,7 +6090,7 @@ static void bnxt_free_irq(struct bnxt *bp)
        free_irq_cpu_rmap(bp->dev->rx_cpu_rmap);
        bp->dev->rx_cpu_rmap = NULL;
 #endif
-       if (!bp->irq_tbl)
+       if (!bp->irq_tbl || !bp->bnapi)
                return;
 
        for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -7686,6 +7686,8 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
        if (bp->flags & BNXT_FLAG_AGG_RINGS)
                rx_rings <<= 1;
        cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx;
+       if (bp->flags & BNXT_FLAG_NEW_RM)
+               cp += bnxt_get_ulp_msix_num(bp);
        return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp,
                                     vnics);
 }
index 8d8ccd67e0e27a58b7b1fde4f61dd0765d4b455f..1f622ca2a64f4d2e82557df6efdf87e9c4778df4 100644 (file)
@@ -870,17 +870,22 @@ static int bnxt_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
                         u8 *hfunc)
 {
        struct bnxt *bp = netdev_priv(dev);
-       struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+       struct bnxt_vnic_info *vnic;
        int i = 0;
 
        if (hfunc)
                *hfunc = ETH_RSS_HASH_TOP;
 
-       if (indir)
+       if (!bp->vnic_info)
+               return 0;
+
+       vnic = &bp->vnic_info[0];
+       if (indir && vnic->rss_table) {
                for (i = 0; i < HW_HASH_INDEX_SIZE; i++)
                        indir[i] = le16_to_cpu(vnic->rss_table[i]);
+       }
 
-       if (key)
+       if (key && vnic->rss_hash_key)
                memcpy(key, vnic->rss_hash_key, HW_HASH_KEY_SIZE);
 
        return 0;
index 65c2cee357669a7a7b5784b3c7b9a3805095304f..795f45024c209e65591a3e9fe60814315ebb3cb0 100644 (file)
@@ -377,6 +377,30 @@ static bool is_wildcard(void *mask, int len)
        return true;
 }
 
+static bool is_exactmatch(void *mask, int len)
+{
+       const u8 *p = mask;
+       int i;
+
+       for (i = 0; i < len; i++)
+               if (p[i] != 0xff)
+                       return false;
+
+       return true;
+}
+
+static bool bits_set(void *key, int len)
+{
+       const u8 *p = key;
+       int i;
+
+       for (i = 0; i < len; i++)
+               if (p[i] != 0)
+                       return true;
+
+       return false;
+}
+
 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
                                    __le16 ref_flow_handle,
                                    __le32 tunnel_handle, __le16 *flow_handle)
@@ -764,6 +788,41 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
                return false;
        }
 
+       /* Currently source/dest MAC cannot be partial wildcard  */
+       if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
+           !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
+               netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
+               return false;
+       }
+       if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
+           !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
+               netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
+               return false;
+       }
+
+       /* Currently VLAN fields cannot be partial wildcard */
+       if (bits_set(&flow->l2_key.inner_vlan_tci,
+                    sizeof(flow->l2_key.inner_vlan_tci)) &&
+           !is_exactmatch(&flow->l2_mask.inner_vlan_tci,
+                          sizeof(flow->l2_mask.inner_vlan_tci))) {
+               netdev_info(bp->dev, "Wildcard match unsupported for VLAN TCI\n");
+               return false;
+       }
+       if (bits_set(&flow->l2_key.inner_vlan_tpid,
+                    sizeof(flow->l2_key.inner_vlan_tpid)) &&
+           !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
+                          sizeof(flow->l2_mask.inner_vlan_tpid))) {
+               netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
+               return false;
+       }
+
+       /* Currently Ethertype must be set */
+       if (!is_exactmatch(&flow->l2_mask.ether_type,
+                          sizeof(flow->l2_mask.ether_type))) {
+               netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
+               return false;
+       }
+
        return true;
 }
 
@@ -992,8 +1051,10 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
 
        /* Check if there's another flow using the same tunnel decap.
         * If not, add this tunnel to the table and resolve the other
-        * tunnel header fileds
+        * tunnel header fileds. Ignore src_port in the tunnel_key,
+        * since it is not required for decap filters.
         */
+       decap_key->tp_src = 0;
        decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
                                             &tc_info->decap_ht_params,
                                             decap_key);
index 26290403f38fa321e3f7bc397f2d2916857338c7..38f635cf840844b6a92a20587a9fcf7f9382b016 100644 (file)
@@ -64,6 +64,31 @@ static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
        return rc;
 }
 
+static int bnxt_hwrm_vfr_qcfg(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
+                             u16 *max_mtu)
+{
+       struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_func_qcfg_input req = {0};
+       u16 mtu;
+       int rc;
+
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
+       req.fid = cpu_to_le16(bp->pf.vf[vf_rep->vf_idx].fw_fid);
+
+       mutex_lock(&bp->hwrm_cmd_lock);
+
+       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       if (!rc) {
+               mtu = le16_to_cpu(resp->max_mtu_configured);
+               if (!mtu)
+                       *max_mtu = BNXT_MAX_MTU;
+               else
+                       *max_mtu = mtu;
+       }
+       mutex_unlock(&bp->hwrm_cmd_lock);
+       return rc;
+}
+
 static int bnxt_vf_rep_open(struct net_device *dev)
 {
        struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
@@ -365,6 +390,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
                                    struct net_device *dev)
 {
        struct net_device *pf_dev = bp->dev;
+       u16 max_mtu;
 
        dev->netdev_ops = &bnxt_vf_rep_netdev_ops;
        dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops;
@@ -380,6 +406,10 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
        bnxt_vf_rep_eth_addr_gen(bp->pf.mac_addr, vf_rep->vf_idx,
                                 dev->perm_addr);
        ether_addr_copy(dev->dev_addr, dev->perm_addr);
+       /* Set VF-Rep's max-mtu to the corresponding VF's max-mtu */
+       if (!bnxt_hwrm_vfr_qcfg(bp, vf_rep, &max_mtu))
+               dev->max_mtu = max_mtu;
+       dev->min_mtu = ETH_ZLEN;
 }
 
 static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[])
index 5fc46c5a4f368bcbefabb980193b27a738418782..448d1fafc8270eeb8fe0ef37bd0f0cb7530abc96 100644 (file)
@@ -265,14 +265,9 @@ struct nicvf_drv_stats {
 
 struct cavium_ptp;
 
-struct xcast_addr {
-       struct list_head list;
-       u64              addr;
-};
-
 struct xcast_addr_list {
-       struct list_head list;
        int              count;
+       u64              mc[];
 };
 
 struct nicvf_work {
index 1e9a31fef729de76f0b908bedf5ba4f34488d734..707db33043967e00d5c9013bd334ccef7660a8c1 100644 (file)
@@ -1929,7 +1929,7 @@ static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
                                                  work.work);
        struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
        union nic_mbx mbx = {};
-       struct xcast_addr *xaddr, *next;
+       int idx;
 
        if (!vf_work)
                return;
@@ -1956,16 +1956,10 @@ static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
        /* check if we have any specific MACs to be added to PF DMAC filter */
        if (vf_work->mc) {
                /* now go through kernel list of MACs and add them one by one */
-               list_for_each_entry_safe(xaddr, next,
-                                        &vf_work->mc->list, list) {
+               for (idx = 0; idx < vf_work->mc->count; idx++) {
                        mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
-                       mbx.xcast.data.mac = xaddr->addr;
+                       mbx.xcast.data.mac = vf_work->mc->mc[idx];
                        nicvf_send_msg_to_pf(nic, &mbx);
-
-                       /* after receiving ACK from PF release memory */
-                       list_del(&xaddr->list);
-                       kfree(xaddr);
-                       vf_work->mc->count--;
                }
                kfree(vf_work->mc);
        }
@@ -1996,17 +1990,15 @@ static void nicvf_set_rx_mode(struct net_device *netdev)
                        mode |= BGX_XCAST_MCAST_FILTER;
                        /* here we need to copy mc addrs */
                        if (netdev_mc_count(netdev)) {
-                               struct xcast_addr *xaddr;
-
-                               mc_list = kmalloc(sizeof(*mc_list), GFP_ATOMIC);
-                               INIT_LIST_HEAD(&mc_list->list);
+                               mc_list = kmalloc(offsetof(typeof(*mc_list),
+                                                          mc[netdev_mc_count(netdev)]),
+                                                 GFP_ATOMIC);
+                               if (unlikely(!mc_list))
+                                       return;
+                               mc_list->count = 0;
                                netdev_hw_addr_list_for_each(ha, &netdev->mc) {
-                                       xaddr = kmalloc(sizeof(*xaddr),
-                                                       GFP_ATOMIC);
-                                       xaddr->addr =
+                                       mc_list->mc[mc_list->count] =
                                                ether_addr_to_u64(ha->addr);
-                                       list_add_tail(&xaddr->list,
-                                                     &mc_list->list);
                                        mc_list->count++;
                                }
                        }
index 80ad16acf0f13ac32e7b09b1b3df05b30df44341..ac2c3f6a12bc6b2f4c3cd75a935271570e375656 100644 (file)
@@ -377,6 +377,38 @@ static const struct of_device_id fsl_pq_mdio_match[] = {
 };
 MODULE_DEVICE_TABLE(of, fsl_pq_mdio_match);
 
+static void set_tbipa(const u32 tbipa_val, struct platform_device *pdev,
+                     uint32_t __iomem * (*get_tbipa)(void __iomem *),
+                     void __iomem *reg_map, struct resource *reg_res)
+{
+       struct device_node *np = pdev->dev.of_node;
+       uint32_t __iomem *tbipa;
+       bool tbipa_mapped;
+
+       tbipa = of_iomap(np, 1);
+       if (tbipa) {
+               tbipa_mapped = true;
+       } else {
+               tbipa_mapped = false;
+               tbipa = (*get_tbipa)(reg_map);
+
+               /*
+                * Add consistency check to make sure TBI is contained within
+                * the mapped range (not because we would get a segfault,
+                * rather to catch bugs in computing TBI address). Print error
+                * message but continue anyway.
+                */
+               if ((void *)tbipa > reg_map + resource_size(reg_res) - 4)
+                       dev_err(&pdev->dev, "invalid register map (should be at least 0x%04zx to contain TBI address)\n",
+                               ((void *)tbipa - reg_map) + 4);
+       }
+
+       iowrite32be(be32_to_cpu(tbipa_val), tbipa);
+
+       if (tbipa_mapped)
+               iounmap(tbipa);
+}
+
 static int fsl_pq_mdio_probe(struct platform_device *pdev)
 {
        const struct of_device_id *id =
@@ -450,8 +482,6 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 
                if (tbi) {
                        const u32 *prop = of_get_property(tbi, "reg", NULL);
-                       uint32_t __iomem *tbipa;
-
                        if (!prop) {
                                dev_err(&pdev->dev,
                                        "missing 'reg' property in node %pOF\n",
@@ -459,20 +489,8 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
                                err = -EBUSY;
                                goto error;
                        }
-
-                       tbipa = data->get_tbipa(priv->map);
-
-                       /*
-                        * Add consistency check to make sure TBI is contained
-                        * within the mapped range (not because we would get a
-                        * segfault, rather to catch bugs in computing TBI
-                        * address). Print error message but continue anyway.
-                        */
-                       if ((void *)tbipa > priv->map + resource_size(&res) - 4)
-                               dev_err(&pdev->dev, "invalid register map (should be at least 0x%04zx to contain TBI address)\n",
-                                       ((void *)tbipa - priv->map) + 4);
-
-                       iowrite32be(be32_to_cpup(prop), tbipa);
+                       set_tbipa(*prop, pdev,
+                                 data->get_tbipa, priv->map, &res);
                }
        }
 
index b492af6affc3fd4de748988d38691b60810feaf2..aad5658d79d5f74424a784bbe55ef3df9bc567b7 100644 (file)
@@ -118,6 +118,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
 static int ibmvnic_init(struct ibmvnic_adapter *);
 static void release_crq_queue(struct ibmvnic_adapter *);
 static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p);
+static int init_crq_queue(struct ibmvnic_adapter *adapter);
 
 struct ibmvnic_stat {
        char name[ETH_GSTRING_LEN];
@@ -320,18 +321,16 @@ failure:
        dev_info(dev, "replenish pools failure\n");
        pool->free_map[pool->next_free] = index;
        pool->rx_buff[index].skb = NULL;
-       if (!dma_mapping_error(dev, dma_addr))
-               dma_unmap_single(dev, dma_addr, pool->buff_size,
-                                DMA_FROM_DEVICE);
 
        dev_kfree_skb_any(skb);
        adapter->replenish_add_buff_failure++;
        atomic_add(buffers_added, &pool->available);
 
-       if (lpar_rc == H_CLOSED) {
+       if (lpar_rc == H_CLOSED || adapter->failover_pending) {
                /* Disable buffer pool replenishment and report carrier off if
-                * queue is closed. Firmware guarantees that a signal will
-                * be sent to the driver, triggering a reset.
+                * queue is closed or pending failover.
+                * Firmware guarantees that a signal will be sent to the
+                * driver, triggering a reset.
                 */
                deactivate_rx_pools(adapter);
                netif_carrier_off(adapter->netdev);
@@ -1071,6 +1070,14 @@ static int ibmvnic_open(struct net_device *netdev)
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
        int rc;
 
+       /* If device failover is pending, just set device state and return.
+        * Device operation will be handled by reset routine.
+        */
+       if (adapter->failover_pending) {
+               adapter->state = VNIC_OPEN;
+               return 0;
+       }
+
        mutex_lock(&adapter->reset_lock);
 
        if (adapter->state != VNIC_CLOSED) {
@@ -1218,7 +1225,6 @@ static int __ibmvnic_close(struct net_device *netdev)
        rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
        if (rc)
                return rc;
-       ibmvnic_cleanup(netdev);
        adapter->state = VNIC_CLOSED;
        return 0;
 }
@@ -1228,8 +1234,17 @@ static int ibmvnic_close(struct net_device *netdev)
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
        int rc;
 
+       /* If device failover is pending, just set device state and return.
+        * Device operation will be handled by reset routine.
+        */
+       if (adapter->failover_pending) {
+               adapter->state = VNIC_CLOSED;
+               return 0;
+       }
+
        mutex_lock(&adapter->reset_lock);
        rc = __ibmvnic_close(netdev);
+       ibmvnic_cleanup(netdev);
        mutex_unlock(&adapter->reset_lock);
 
        return rc;
@@ -1562,8 +1577,9 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
                dev_kfree_skb_any(skb);
                tx_buff->skb = NULL;
 
-               if (lpar_rc == H_CLOSED) {
-                       /* Disable TX and report carrier off if queue is closed.
+               if (lpar_rc == H_CLOSED || adapter->failover_pending) {
+                       /* Disable TX and report carrier off if queue is closed
+                        * or pending failover.
                         * Firmware guarantees that a signal will be sent to the
                         * driver, triggering a reset or some other action.
                         */
@@ -1711,14 +1727,10 @@ static int do_reset(struct ibmvnic_adapter *adapter,
        old_num_rx_queues = adapter->req_rx_queues;
        old_num_tx_queues = adapter->req_tx_queues;
 
-       if (rwi->reset_reason == VNIC_RESET_MOBILITY) {
-               rc = ibmvnic_reenable_crq_queue(adapter);
-               if (rc)
-                       return 0;
-               ibmvnic_cleanup(netdev);
-       } else if (rwi->reset_reason == VNIC_RESET_FAILOVER) {
-               ibmvnic_cleanup(netdev);
-       } else {
+       ibmvnic_cleanup(netdev);
+
+       if (adapter->reset_reason != VNIC_RESET_MOBILITY &&
+           adapter->reset_reason != VNIC_RESET_FAILOVER) {
                rc = __ibmvnic_close(netdev);
                if (rc)
                        return rc;
@@ -1737,6 +1749,23 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                 */
                adapter->state = VNIC_PROBED;
 
+               if (adapter->wait_for_reset) {
+                       rc = init_crq_queue(adapter);
+               } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+                       rc = ibmvnic_reenable_crq_queue(adapter);
+                       release_sub_crqs(adapter, 1);
+               } else {
+                       rc = ibmvnic_reset_crq(adapter);
+                       if (!rc)
+                               rc = vio_enable_interrupts(adapter->vdev);
+               }
+
+               if (rc) {
+                       netdev_err(adapter->netdev,
+                                  "Couldn't initialize crq. rc=%d\n", rc);
+                       return rc;
+               }
+
                rc = ibmvnic_init(adapter);
                if (rc)
                        return IBMVNIC_INIT_FAILED;
@@ -1878,23 +1907,26 @@ static void __ibmvnic_reset(struct work_struct *work)
        mutex_unlock(&adapter->reset_lock);
 }
 
-static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
-                         enum ibmvnic_reset_reason reason)
+static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
+                        enum ibmvnic_reset_reason reason)
 {
        struct ibmvnic_rwi *rwi, *tmp;
        struct net_device *netdev = adapter->netdev;
        struct list_head *entry;
+       int ret;
 
        if (adapter->state == VNIC_REMOVING ||
-           adapter->state == VNIC_REMOVED) {
-               netdev_dbg(netdev, "Adapter removing, skipping reset\n");
-               return;
+           adapter->state == VNIC_REMOVED ||
+           adapter->failover_pending) {
+               ret = EBUSY;
+               netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n");
+               goto err;
        }
 
        if (adapter->state == VNIC_PROBING) {
                netdev_warn(netdev, "Adapter reset during probe\n");
-               adapter->init_done_rc = EAGAIN;
-               return;
+               ret = adapter->init_done_rc = EAGAIN;
+               goto err;
        }
 
        mutex_lock(&adapter->rwi_lock);
@@ -1904,7 +1936,8 @@ static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
                if (tmp->reset_reason == reason) {
                        netdev_dbg(netdev, "Skipping matching reset\n");
                        mutex_unlock(&adapter->rwi_lock);
-                       return;
+                       ret = EBUSY;
+                       goto err;
                }
        }
 
@@ -1912,7 +1945,8 @@ static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
        if (!rwi) {
                mutex_unlock(&adapter->rwi_lock);
                ibmvnic_close(netdev);
-               return;
+               ret = ENOMEM;
+               goto err;
        }
 
        rwi->reset_reason = reason;
@@ -1921,6 +1955,12 @@ static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
 
        netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
        schedule_work(&adapter->ibmvnic_reset);
+
+       return 0;
+err:
+       if (adapter->wait_for_reset)
+               adapter->wait_for_reset = false;
+       return -ret;
 }
 
 static void ibmvnic_tx_timeout(struct net_device *dev)
@@ -2055,6 +2095,8 @@ static void ibmvnic_netpoll_controller(struct net_device *dev)
 
 static int wait_for_reset(struct ibmvnic_adapter *adapter)
 {
+       int rc, ret;
+
        adapter->fallback.mtu = adapter->req_mtu;
        adapter->fallback.rx_queues = adapter->req_rx_queues;
        adapter->fallback.tx_queues = adapter->req_tx_queues;
@@ -2062,11 +2104,15 @@ static int wait_for_reset(struct ibmvnic_adapter *adapter)
        adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq;
 
        init_completion(&adapter->reset_done);
-       ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
        adapter->wait_for_reset = true;
+       rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
+       if (rc)
+               return rc;
        wait_for_completion(&adapter->reset_done);
 
+       ret = 0;
        if (adapter->reset_done_rc) {
+               ret = -EIO;
                adapter->desired.mtu = adapter->fallback.mtu;
                adapter->desired.rx_queues = adapter->fallback.rx_queues;
                adapter->desired.tx_queues = adapter->fallback.tx_queues;
@@ -2074,12 +2120,15 @@ static int wait_for_reset(struct ibmvnic_adapter *adapter)
                adapter->desired.tx_entries = adapter->fallback.tx_entries;
 
                init_completion(&adapter->reset_done);
-               ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
+               adapter->wait_for_reset = true;
+               rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
+               if (rc)
+                       return ret;
                wait_for_completion(&adapter->reset_done);
        }
        adapter->wait_for_reset = false;
 
-       return adapter->reset_done_rc;
+       return ret;
 }
 
 static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
@@ -2364,6 +2413,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter,
        }
 
        memset(scrq->msgs, 0, 4 * PAGE_SIZE);
+       atomic_set(&scrq->used, 0);
        scrq->cur = 0;
 
        rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
@@ -2574,7 +2624,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
        union sub_crq *next;
        int index;
        int i, j;
-       u8 first;
+       u8 *first;
 
 restart_loop:
        while (pending_scrq(adapter, scrq)) {
@@ -2605,11 +2655,12 @@ restart_loop:
                                txbuff->data_dma[j] = 0;
                        }
                        /* if sub_crq was sent indirectly */
-                       first = txbuff->indir_arr[0].generic.first;
-                       if (first == IBMVNIC_CRQ_CMD) {
+                       first = &txbuff->indir_arr[0].generic.first;
+                       if (*first == IBMVNIC_CRQ_CMD) {
                                dma_unmap_single(dev, txbuff->indir_dma,
                                                 sizeof(txbuff->indir_arr),
                                                 DMA_TO_DEVICE);
+                               *first = 0;
                        }
 
                        if (txbuff->last_frag) {
@@ -3882,9 +3933,9 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
        int i;
 
        dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
-                        DMA_BIDIRECTIONAL);
+                        DMA_TO_DEVICE);
        dma_unmap_single(dev, adapter->login_rsp_buf_token,
-                        adapter->login_rsp_buf_sz, DMA_BIDIRECTIONAL);
+                        adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
 
        /* If the number of queues requested can't be allocated by the
         * server, the login response will return with code 1. We will need
@@ -4144,7 +4195,9 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
                case IBMVNIC_CRQ_INIT:
                        dev_info(dev, "Partner initialized\n");
                        adapter->from_passive_init = true;
+                       adapter->failover_pending = false;
                        complete(&adapter->init_done);
+                       ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
                        break;
                case IBMVNIC_CRQ_INIT_COMPLETE:
                        dev_info(dev, "Partner initialization complete\n");
@@ -4161,7 +4214,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
                        ibmvnic_reset(adapter, VNIC_RESET_MOBILITY);
                } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) {
                        dev_info(dev, "Backing device failover detected\n");
-                       ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+                       adapter->failover_pending = true;
                } else {
                        /* The adapter lost the connection */
                        dev_err(dev, "Virtual Adapter failed (rc=%d)\n",
@@ -4461,19 +4514,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
        u64 old_num_rx_queues, old_num_tx_queues;
        int rc;
 
-       if (adapter->resetting && !adapter->wait_for_reset) {
-               rc = ibmvnic_reset_crq(adapter);
-               if (!rc)
-                       rc = vio_enable_interrupts(adapter->vdev);
-       } else {
-               rc = init_crq_queue(adapter);
-       }
-
-       if (rc) {
-               dev_err(dev, "Couldn't initialize crq. rc=%d\n", rc);
-               return rc;
-       }
-
        adapter->from_passive_init = false;
 
        old_num_rx_queues = adapter->req_rx_queues;
@@ -4498,7 +4538,8 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
                return -1;
        }
 
-       if (adapter->resetting && !adapter->wait_for_reset) {
+       if (adapter->resetting && !adapter->wait_for_reset &&
+           adapter->reset_reason != VNIC_RESET_MOBILITY) {
                if (adapter->req_rx_queues != old_num_rx_queues ||
                    adapter->req_tx_queues != old_num_tx_queues) {
                        release_sub_crqs(adapter, 0);
@@ -4586,6 +4627,13 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
        adapter->mac_change_pending = false;
 
        do {
+               rc = init_crq_queue(adapter);
+               if (rc) {
+                       dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n",
+                               rc);
+                       goto ibmvnic_init_fail;
+               }
+
                rc = ibmvnic_init(adapter);
                if (rc && rc != EAGAIN)
                        goto ibmvnic_init_fail;
index 89efe700eafe796667147b58bbe617cc57761722..99c0b58c2c39258564ba55a6527786c6f7c0be5d 100644 (file)
@@ -1108,6 +1108,7 @@ struct ibmvnic_adapter {
        bool napi_enabled, from_passive_init;
 
        bool mac_change_pending;
+       bool failover_pending;
 
        struct ibmvnic_tunables desired;
        struct ibmvnic_tunables fallback;
index 385f5d425d19cbe4379edb52aa2e96f99f6135e8..21977ec984c4a1f193d7b68f3dce278edae02986 100644 (file)
@@ -468,8 +468,10 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
        mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp);
        mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL);
 
-       if (!mac_buf)
+       if (!mac_buf) {
+               status = ICE_ERR_NO_MEMORY;
                goto err_unroll_fltr_mgmt_struct;
+       }
 
        status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
        devm_kfree(ice_hw_to_dev(hw), mac_buf);
index 186764a5c2636691ce81da4cbd42791186c69573..1db304c01d100604560a5d4ac8216b093a7f92d6 100644 (file)
@@ -156,7 +156,7 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 
 static int ice_get_regs_len(struct net_device __always_unused *netdev)
 {
-       return ARRAY_SIZE(ice_regs_dump_list);
+       return sizeof(ice_regs_dump_list);
 }
 
 static void
@@ -170,7 +170,7 @@ ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
 
        regs->version = 1;
 
-       for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list) / sizeof(u32); ++i)
+       for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list); ++i)
                regs_buf[i] = rd32(hw, ice_regs_dump_list[i]);
 }
 
index 7fc1bbf51c44452380d6ff19d3f9843df5e5027e..54a038943c0649f74f72c8bdb5120d9c285ce8d2 100644 (file)
@@ -1604,7 +1604,7 @@ static int mvpp2_prs_init_from_hw(struct mvpp2 *priv,
 {
        int i;
 
-       if (pe->index > MVPP2_PRS_TCAM_SRAM_SIZE - 1)
+       if (tid > MVPP2_PRS_TCAM_SRAM_SIZE - 1)
                return -EINVAL;
 
        memset(pe, 0, sizeof(*pe));
index 53fffd09d133817011da2d620813de96a5460bf2..ca38a30fbe913c6126f1dab6afad79fae193887b 100644 (file)
@@ -3805,18 +3805,6 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
        },
 };
 
-static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
-{
-       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
-
-       return mlxsw_sp_kvdl_occ_get(mlxsw_sp);
-}
-
-static const struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
-       .occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
-};
-
 static void
 mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core,
                                      struct devlink_resource_size_params *kvd_size_params,
@@ -3877,8 +3865,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
        err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD,
                                        kvd_size, MLXSW_SP_RESOURCE_KVD,
                                        DEVLINK_RESOURCE_ID_PARENT_TOP,
-                                       &kvd_size_params,
-                                       NULL);
+                                       &kvd_size_params);
        if (err)
                return err;
 
@@ -3887,8 +3874,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
                                        linear_size,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR,
                                        MLXSW_SP_RESOURCE_KVD,
-                                       &linear_size_params,
-                                       &mlxsw_sp_resource_kvd_linear_ops);
+                                       &linear_size_params);
        if (err)
                return err;
 
@@ -3905,8 +3891,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
                                        double_size,
                                        MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
                                        MLXSW_SP_RESOURCE_KVD,
-                                       &hash_double_size_params,
-                                       NULL);
+                                       &hash_double_size_params);
        if (err)
                return err;
 
@@ -3915,8 +3900,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
                                        single_size,
                                        MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
                                        MLXSW_SP_RESOURCE_KVD,
-                                       &hash_single_size_params,
-                                       NULL);
+                                       &hash_single_size_params);
        if (err)
                return err;
 
index 82820ba43728dc113bc28158d10811169d2a9ea4..804d4d2c80318310b09d8174c2ec82794e6e2865 100644 (file)
@@ -442,7 +442,6 @@ void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
 int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
                                   unsigned int entry_count,
                                   unsigned int *p_alloc_size);
-u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
 int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
 
 struct mlxsw_sp_acl_rule_info {
index 8796db44dcc391e256c3454a8fd99444ffcade93..fe4327f547d23b1caa5138cfe1544f05af2511d3 100644 (file)
@@ -315,8 +315,9 @@ static u64 mlxsw_sp_kvdl_part_occ(struct mlxsw_sp_kvdl_part *part)
        return occ;
 }
 
-u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
+static u64 mlxsw_sp_kvdl_occ_get(void *priv)
 {
+       const struct mlxsw_sp *mlxsw_sp = priv;
        u64 occ = 0;
        int i;
 
@@ -326,48 +327,33 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
        return occ;
 }
 
-static u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_single_occ_get(void *priv)
 {
-       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+       const struct mlxsw_sp *mlxsw_sp = priv;
        struct mlxsw_sp_kvdl_part *part;
 
        part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_SINGLE];
        return mlxsw_sp_kvdl_part_occ(part);
 }
 
-static u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_chunks_occ_get(void *priv)
 {
-       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+       const struct mlxsw_sp *mlxsw_sp = priv;
        struct mlxsw_sp_kvdl_part *part;
 
        part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_CHUNKS];
        return mlxsw_sp_kvdl_part_occ(part);
 }
 
-static u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_large_chunks_occ_get(void *priv)
 {
-       struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+       const struct mlxsw_sp *mlxsw_sp = priv;
        struct mlxsw_sp_kvdl_part *part;
 
        part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS];
        return mlxsw_sp_kvdl_part_occ(part);
 }
 
-static const struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
-       .occ_get = mlxsw_sp_kvdl_single_occ_get,
-};
-
-static const struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
-       .occ_get = mlxsw_sp_kvdl_chunks_occ_get,
-};
-
-static const struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
-       .occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
-};
-
 int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
 {
        struct devlink *devlink = priv_to_devlink(mlxsw_core);
@@ -386,8 +372,7 @@ int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
                                        MLXSW_SP_KVDL_SINGLE_SIZE,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR,
-                                       &size_params,
-                                       &mlxsw_sp_kvdl_single_ops);
+                                       &size_params);
        if (err)
                return err;
 
@@ -398,8 +383,7 @@ int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
                                        MLXSW_SP_KVDL_CHUNKS_SIZE,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR,
-                                       &size_params,
-                                       &mlxsw_sp_kvdl_chunks_ops);
+                                       &size_params);
        if (err)
                return err;
 
@@ -410,13 +394,13 @@ int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
                                        MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
                                        MLXSW_SP_RESOURCE_KVD_LINEAR,
-                                       &size_params,
-                                       &mlxsw_sp_kvdl_chunks_large_ops);
+                                       &size_params);
        return err;
 }
 
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 {
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
        struct mlxsw_sp_kvdl *kvdl;
        int err;
 
@@ -429,6 +413,23 @@ int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
        if (err)
                goto err_kvdl_parts_init;
 
+       devlink_resource_occ_get_register(devlink,
+                                         MLXSW_SP_RESOURCE_KVD_LINEAR,
+                                         mlxsw_sp_kvdl_occ_get,
+                                         mlxsw_sp);
+       devlink_resource_occ_get_register(devlink,
+                                         MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+                                         mlxsw_sp_kvdl_single_occ_get,
+                                         mlxsw_sp);
+       devlink_resource_occ_get_register(devlink,
+                                         MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+                                         mlxsw_sp_kvdl_chunks_occ_get,
+                                         mlxsw_sp);
+       devlink_resource_occ_get_register(devlink,
+                                         MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+                                         mlxsw_sp_kvdl_large_chunks_occ_get,
+                                         mlxsw_sp);
+
        return 0;
 
 err_kvdl_parts_init:
@@ -438,6 +439,16 @@ err_kvdl_parts_init:
 
 void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
 {
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+       devlink_resource_occ_get_unregister(devlink,
+                                           MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS);
+       devlink_resource_occ_get_unregister(devlink,
+                                           MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS);
+       devlink_resource_occ_get_unregister(devlink,
+                                           MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE);
+       devlink_resource_occ_get_unregister(devlink,
+                                           MLXSW_SP_RESOURCE_KVD_LINEAR);
        mlxsw_sp_kvdl_parts_fini(mlxsw_sp);
        kfree(mlxsw_sp->kvdl);
 }
index 9c2567b0d93e95eb3eace210e439c6ddf51a28f3..dfad93fca0a61c1d353ad4d8527751a930f517ef 100644 (file)
@@ -375,7 +375,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
         * because generally mcdi responses are fast. After that, back off
         * and poll once a jiffy (approximately)
         */
-       spins = TICK_USEC;
+       spins = USER_TICK_USEC;
        finish = jiffies + MCDI_RPC_TIMEOUT;
 
        while (1) {
index c9910c33e671f3f161f83249491d63e557274bbf..04f611e6f678f69b124037b392c7bf060dd686b0 100644 (file)
@@ -109,11 +109,11 @@ static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
        call_rcu(&nvdev->rcu, free_netvsc_device);
 }
 
-static void netvsc_revoke_buf(struct hv_device *device,
-                             struct netvsc_device *net_device)
+static void netvsc_revoke_recv_buf(struct hv_device *device,
+                                  struct netvsc_device *net_device,
+                                  struct net_device *ndev)
 {
        struct nvsp_message *revoke_packet;
-       struct net_device *ndev = hv_get_drvdata(device);
        int ret;
 
        /*
@@ -157,6 +157,14 @@ static void netvsc_revoke_buf(struct hv_device *device,
                }
                net_device->recv_section_cnt = 0;
        }
+}
+
+static void netvsc_revoke_send_buf(struct hv_device *device,
+                                  struct netvsc_device *net_device,
+                                  struct net_device *ndev)
+{
+       struct nvsp_message *revoke_packet;
+       int ret;
 
        /* Deal with the send buffer we may have setup.
         * If we got a  send section size, it means we received a
@@ -202,10 +210,10 @@ static void netvsc_revoke_buf(struct hv_device *device,
        }
 }
 
-static void netvsc_teardown_gpadl(struct hv_device *device,
-                                 struct netvsc_device *net_device)
+static void netvsc_teardown_recv_gpadl(struct hv_device *device,
+                                      struct netvsc_device *net_device,
+                                      struct net_device *ndev)
 {
-       struct net_device *ndev = hv_get_drvdata(device);
        int ret;
 
        if (net_device->recv_buf_gpadl_handle) {
@@ -222,6 +230,13 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
                }
                net_device->recv_buf_gpadl_handle = 0;
        }
+}
+
+static void netvsc_teardown_send_gpadl(struct hv_device *device,
+                                      struct netvsc_device *net_device,
+                                      struct net_device *ndev)
+{
+       int ret;
 
        if (net_device->send_buf_gpadl_handle) {
                ret = vmbus_teardown_gpadl(device->channel,
@@ -437,8 +452,10 @@ static int netvsc_init_buf(struct hv_device *device,
        goto exit;
 
 cleanup:
-       netvsc_revoke_buf(device, net_device);
-       netvsc_teardown_gpadl(device, net_device);
+       netvsc_revoke_recv_buf(device, net_device, ndev);
+       netvsc_revoke_send_buf(device, net_device, ndev);
+       netvsc_teardown_recv_gpadl(device, net_device, ndev);
+       netvsc_teardown_send_gpadl(device, net_device, ndev);
 
 exit:
        return ret;
@@ -457,7 +474,6 @@ static int negotiate_nvsp_ver(struct hv_device *device,
        init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
        init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
        init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
-
        trace_nvsp_send(ndev, init_packet);
 
        /* Send the init request */
@@ -575,7 +591,17 @@ void netvsc_device_remove(struct hv_device *device)
                = rtnl_dereference(net_device_ctx->nvdev);
        int i;
 
-       netvsc_revoke_buf(device, net_device);
+       /*
+        * Revoke receive buffer. If host is pre-Win2016 then tear down
+        * receive buffer GPADL. Do the same for send buffer.
+        */
+       netvsc_revoke_recv_buf(device, net_device, ndev);
+       if (vmbus_proto_version < VERSION_WIN10)
+               netvsc_teardown_recv_gpadl(device, net_device, ndev);
+
+       netvsc_revoke_send_buf(device, net_device, ndev);
+       if (vmbus_proto_version < VERSION_WIN10)
+               netvsc_teardown_send_gpadl(device, net_device, ndev);
 
        RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 
@@ -589,15 +615,17 @@ void netvsc_device_remove(struct hv_device *device)
         */
        netdev_dbg(ndev, "net device safe to remove\n");
 
-       /* older versions require that buffer be revoked before close */
-       if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
-               netvsc_teardown_gpadl(device, net_device);
-
        /* Now, we can close the channel safely */
        vmbus_close(device->channel);
 
-       if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
-               netvsc_teardown_gpadl(device, net_device);
+       /*
+        * If host is Win2016 or higher then we do the GPADL tear down
+        * here after VMBus is closed.
+       */
+       if (vmbus_proto_version >= VERSION_WIN10) {
+               netvsc_teardown_recv_gpadl(device, net_device, ndev);
+               netvsc_teardown_send_gpadl(device, net_device, ndev);
+       }
 
        /* Release all resources */
        free_netvsc_device_rcu(net_device);
index 1dba4793645649a01200b7daf7261a8877ed9bbe..bef7db5d129a55c31b6d0f2a5094c7b61b61b1d5 100644 (file)
@@ -30,52 +30,36 @@ static struct net *nsim_devlink_net(struct devlink *devlink)
 
 /* IPv4
  */
-static u64 nsim_ipv4_fib_resource_occ_get(struct devlink *devlink)
+static u64 nsim_ipv4_fib_resource_occ_get(void *priv)
 {
-       struct net *net = nsim_devlink_net(devlink);
+       struct net *net = priv;
 
        return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false);
 }
 
-static struct devlink_resource_ops nsim_ipv4_fib_res_ops = {
-       .occ_get = nsim_ipv4_fib_resource_occ_get,
-};
-
-static u64 nsim_ipv4_fib_rules_res_occ_get(struct devlink *devlink)
+static u64 nsim_ipv4_fib_rules_res_occ_get(void *priv)
 {
-       struct net *net = nsim_devlink_net(devlink);
+       struct net *net = priv;
 
        return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false);
 }
 
-static struct devlink_resource_ops nsim_ipv4_fib_rules_res_ops = {
-       .occ_get = nsim_ipv4_fib_rules_res_occ_get,
-};
-
 /* IPv6
  */
-static u64 nsim_ipv6_fib_resource_occ_get(struct devlink *devlink)
+static u64 nsim_ipv6_fib_resource_occ_get(void *priv)
 {
-       struct net *net = nsim_devlink_net(devlink);
+       struct net *net = priv;
 
        return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false);
 }
 
-static struct devlink_resource_ops nsim_ipv6_fib_res_ops = {
-       .occ_get = nsim_ipv6_fib_resource_occ_get,
-};
-
-static u64 nsim_ipv6_fib_rules_res_occ_get(struct devlink *devlink)
+static u64 nsim_ipv6_fib_rules_res_occ_get(void *priv)
 {
-       struct net *net = nsim_devlink_net(devlink);
+       struct net *net = priv;
 
        return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false);
 }
 
-static struct devlink_resource_ops nsim_ipv6_fib_rules_res_ops = {
-       .occ_get = nsim_ipv6_fib_rules_res_occ_get,
-};
-
 static int devlink_resources_register(struct devlink *devlink)
 {
        struct devlink_resource_size_params params = {
@@ -91,7 +75,7 @@ static int devlink_resources_register(struct devlink *devlink)
        err = devlink_resource_register(devlink, "IPv4", (u64)-1,
                                        NSIM_RESOURCE_IPV4,
                                        DEVLINK_RESOURCE_ID_PARENT_TOP,
-                                       &params, NULL);
+                                       &params);
        if (err) {
                pr_err("Failed to register IPv4 top resource\n");
                goto out;
@@ -100,8 +84,7 @@ static int devlink_resources_register(struct devlink *devlink)
        n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true);
        err = devlink_resource_register(devlink, "fib", n,
                                        NSIM_RESOURCE_IPV4_FIB,
-                                       NSIM_RESOURCE_IPV4,
-                                       &params, &nsim_ipv4_fib_res_ops);
+                                       NSIM_RESOURCE_IPV4, &params);
        if (err) {
                pr_err("Failed to register IPv4 FIB resource\n");
                return err;
@@ -110,8 +93,7 @@ static int devlink_resources_register(struct devlink *devlink)
        n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true);
        err = devlink_resource_register(devlink, "fib-rules", n,
                                        NSIM_RESOURCE_IPV4_FIB_RULES,
-                                       NSIM_RESOURCE_IPV4,
-                                       &params, &nsim_ipv4_fib_rules_res_ops);
+                                       NSIM_RESOURCE_IPV4, &params);
        if (err) {
                pr_err("Failed to register IPv4 FIB rules resource\n");
                return err;
@@ -121,7 +103,7 @@ static int devlink_resources_register(struct devlink *devlink)
        err = devlink_resource_register(devlink, "IPv6", (u64)-1,
                                        NSIM_RESOURCE_IPV6,
                                        DEVLINK_RESOURCE_ID_PARENT_TOP,
-                                       &params, NULL);
+                                       &params);
        if (err) {
                pr_err("Failed to register IPv6 top resource\n");
                goto out;
@@ -130,8 +112,7 @@ static int devlink_resources_register(struct devlink *devlink)
        n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true);
        err = devlink_resource_register(devlink, "fib", n,
                                        NSIM_RESOURCE_IPV6_FIB,
-                                       NSIM_RESOURCE_IPV6,
-                                       &params, &nsim_ipv6_fib_res_ops);
+                                       NSIM_RESOURCE_IPV6, &params);
        if (err) {
                pr_err("Failed to register IPv6 FIB resource\n");
                return err;
@@ -140,12 +121,28 @@ static int devlink_resources_register(struct devlink *devlink)
        n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true);
        err = devlink_resource_register(devlink, "fib-rules", n,
                                        NSIM_RESOURCE_IPV6_FIB_RULES,
-                                       NSIM_RESOURCE_IPV6,
-                                       &params, &nsim_ipv6_fib_rules_res_ops);
+                                       NSIM_RESOURCE_IPV6, &params);
        if (err) {
                pr_err("Failed to register IPv6 FIB rules resource\n");
                return err;
        }
+
+       devlink_resource_occ_get_register(devlink,
+                                         NSIM_RESOURCE_IPV4_FIB,
+                                         nsim_ipv4_fib_resource_occ_get,
+                                         net);
+       devlink_resource_occ_get_register(devlink,
+                                         NSIM_RESOURCE_IPV4_FIB_RULES,
+                                         nsim_ipv4_fib_rules_res_occ_get,
+                                         net);
+       devlink_resource_occ_get_register(devlink,
+                                         NSIM_RESOURCE_IPV6_FIB,
+                                         nsim_ipv6_fib_resource_occ_get,
+                                         net);
+       devlink_resource_occ_get_register(devlink,
+                                         NSIM_RESOURCE_IPV6_FIB_RULES,
+                                         nsim_ipv6_fib_rules_res_occ_get,
+                                         net);
 out:
        return err;
 }
index 654f42d000926d420fbcfd6314efcf332b9a5a0a..a6c87793d899f325b11768a0f7334ede655a7807 100644 (file)
@@ -1207,6 +1207,23 @@ static void dp83640_remove(struct phy_device *phydev)
        kfree(dp83640);
 }
 
+static int dp83640_soft_reset(struct phy_device *phydev)
+{
+       int ret;
+
+       ret = genphy_soft_reset(phydev);
+       if (ret < 0)
+               return ret;
+
+       /* From DP83640 datasheet: "Software driver code must wait 3 us
+        * following a software reset before allowing further serial MII
+        * operations with the DP83640."
+        */
+       udelay(10);             /* Taking udelay inaccuracy into account */
+
+       return 0;
+}
+
 static int dp83640_config_init(struct phy_device *phydev)
 {
        struct dp83640_private *dp83640 = phydev->priv;
@@ -1501,6 +1518,7 @@ static struct phy_driver dp83640_driver = {
        .flags          = PHY_HAS_INTERRUPT,
        .probe          = dp83640_probe,
        .remove         = dp83640_remove,
+       .soft_reset     = dp83640_soft_reset,
        .config_init    = dp83640_config_init,
        .ack_interrupt  = dp83640_ack_interrupt,
        .config_intr    = dp83640_config_intr,
index a75c511950c331643108b03a17a7d0aee1327a3d..c22e8e38324793024d05ce6df15fc9274a5d7fdc 100644 (file)
@@ -828,6 +828,22 @@ static int m88e1121_config_init(struct phy_device *phydev)
        return marvell_config_init(phydev);
 }
 
+static int m88e1318_config_init(struct phy_device *phydev)
+{
+       if (phy_interrupt_is_valid(phydev)) {
+               int err = phy_modify_paged(
+                       phydev, MII_MARVELL_LED_PAGE,
+                       MII_88E1318S_PHY_LED_TCR,
+                       MII_88E1318S_PHY_LED_TCR_FORCE_INT,
+                       MII_88E1318S_PHY_LED_TCR_INTn_ENABLE |
+                       MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW);
+               if (err < 0)
+                       return err;
+       }
+
+       return m88e1121_config_init(phydev);
+}
+
 static int m88e1510_config_init(struct phy_device *phydev)
 {
        int err;
@@ -870,7 +886,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
                phydev->advertising &= ~pause;
        }
 
-       return m88e1121_config_init(phydev);
+       return m88e1318_config_init(phydev);
 }
 
 static int m88e1118_config_aneg(struct phy_device *phydev)
@@ -2086,7 +2102,7 @@ static struct phy_driver marvell_drivers[] = {
                .features = PHY_GBIT_FEATURES,
                .flags = PHY_HAS_INTERRUPT,
                .probe = marvell_probe,
-               .config_init = &m88e1121_config_init,
+               .config_init = &m88e1318_config_init,
                .config_aneg = &m88e1318_config_aneg,
                .read_status = &marvell_read_status,
                .ack_interrupt = &marvell_ack_interrupt,
index 5782733959f0e0a7648d92ae1717065eb72b85fd..f4e93f5fc2043ebb29c5b36e94afe49ec0c7d7ba 100644 (file)
@@ -509,6 +509,10 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize)
                if(x < 0 || x > comp->rslot_limit)
                        goto bad;
 
+               /* Check if the cstate is initialized */
+               if (!comp->rstate[x].initialized)
+                       goto bad;
+
                comp->flags &=~ SLF_TOSS;
                comp->recv_current = x;
        } else {
@@ -673,6 +677,7 @@ slhc_remember(struct slcompress *comp, unsigned char *icp, int isize)
        if (cs->cs_tcp.doff > 5)
          memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4);
        cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2;
+       cs->initialized = true;
        /* Put headers back on packet
         * Neither header checksum is recalculated
         */
index a1ba262f40ad0755d2cea34867851bb7acd8e2de..28583aa0c17df6fdbeb4f30625f258dcd69855d0 100644 (file)
@@ -743,8 +743,15 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 
 static void tun_detach(struct tun_file *tfile, bool clean)
 {
+       struct tun_struct *tun;
+       struct net_device *dev;
+
        rtnl_lock();
+       tun = rtnl_dereference(tfile->tun);
+       dev = tun ? tun->dev : NULL;
        __tun_detach(tfile, clean);
+       if (dev)
+               netdev_state_change(dev);
        rtnl_unlock();
 }
 
@@ -2562,10 +2569,15 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
                        /* One or more queue has already been attached, no need
                         * to initialize the device again.
                         */
+                       netdev_state_change(dev);
                        return 0;
                }
-       }
-       else {
+
+               tun->flags = (tun->flags & ~TUN_FEATURES) |
+                             (ifr->ifr_flags & TUN_FEATURES);
+
+               netdev_state_change(dev);
+       } else {
                char *name;
                unsigned long flags = 0;
                int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
@@ -2642,6 +2654,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
                                     ~(NETIF_F_HW_VLAN_CTAG_TX |
                                       NETIF_F_HW_VLAN_STAG_TX);
 
+               tun->flags = (tun->flags & ~TUN_FEATURES) |
+                             (ifr->ifr_flags & TUN_FEATURES);
+
                INIT_LIST_HEAD(&tun->disabled);
                err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
                if (err < 0)
@@ -2656,9 +2671,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
        tun_debug(KERN_INFO, tun, "tun_set_iff\n");
 
-       tun->flags = (tun->flags & ~TUN_FEATURES) |
-               (ifr->ifr_flags & TUN_FEATURES);
-
        /* Make sure persistent devices do not get stuck in
         * xoff state.
         */
@@ -2805,6 +2817,9 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
        } else
                ret = -EINVAL;
 
+       if (ret >= 0)
+               netdev_state_change(tun->dev);
+
 unlock:
        rtnl_unlock();
        return ret;
@@ -2845,6 +2860,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
        unsigned int ifindex;
        int le;
        int ret;
+       bool do_notify = false;
 
        if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
            (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
@@ -2941,10 +2957,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                if (arg && !(tun->flags & IFF_PERSIST)) {
                        tun->flags |= IFF_PERSIST;
                        __module_get(THIS_MODULE);
+                       do_notify = true;
                }
                if (!arg && (tun->flags & IFF_PERSIST)) {
                        tun->flags &= ~IFF_PERSIST;
                        module_put(THIS_MODULE);
+                       do_notify = true;
                }
 
                tun_debug(KERN_INFO, tun, "persist %s\n",
@@ -2959,6 +2977,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                        break;
                }
                tun->owner = owner;
+               do_notify = true;
                tun_debug(KERN_INFO, tun, "owner set to %u\n",
                          from_kuid(&init_user_ns, tun->owner));
                break;
@@ -2971,6 +2990,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                        break;
                }
                tun->group = group;
+               do_notify = true;
                tun_debug(KERN_INFO, tun, "group set to %u\n",
                          from_kgid(&init_user_ns, tun->group));
                break;
@@ -3130,6 +3150,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                break;
        }
 
+       if (do_notify)
+               netdev_state_change(tun->dev);
+
 unlock:
        rtnl_unlock();
        if (tun)
index fff4b13eece29cd3d742309a0f7d57436ec22699..5c42cf81a08b2c250bb98c792012436d88df9562 100644 (file)
@@ -901,6 +901,12 @@ static const struct usb_device_id  products[] = {
                                      USB_CDC_SUBCLASS_ETHERNET,
                                      USB_CDC_PROTO_NONE),
        .driver_info = (unsigned long)&wwan_info,
+}, {
+       /* Cinterion AHS3 modem by GEMALTO */
+       USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0055, USB_CLASS_COMM,
+                                     USB_CDC_SUBCLASS_ETHERNET,
+                                     USB_CDC_PROTO_NONE),
+       .driver_info = (unsigned long)&wwan_info,
 }, {
        USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET,
                        USB_CDC_PROTO_NONE),
index aff105f5f58c1c75dc8e4b63b46baf77e854bc31..0867f72758522377ccbc38b649ad8aa4cb489b08 100644 (file)
@@ -928,7 +928,8 @@ static int lan78xx_read_otp(struct lan78xx_net *dev, u32 offset,
                        offset += 0x100;
                else
                        ret = -EINVAL;
-               ret = lan78xx_read_raw_otp(dev, offset, length, data);
+               if (!ret)
+                       ret = lan78xx_read_raw_otp(dev, offset, length, data);
        }
 
        return ret;
@@ -2502,7 +2503,7 @@ static void lan78xx_init_stats(struct lan78xx_net *dev)
        dev->stats.rollover_max.eee_tx_lpi_transitions = 0xFFFFFFFF;
        dev->stats.rollover_max.eee_tx_lpi_time = 0xFFFFFFFF;
 
-       lan78xx_defer_kevent(dev, EVENT_STAT_UPDATE);
+       set_bit(EVENT_STAT_UPDATE, &dev->flags);
 }
 
 static int lan78xx_open(struct net_device *net)
@@ -2514,10 +2515,6 @@ static int lan78xx_open(struct net_device *net)
        if (ret < 0)
                goto out;
 
-       ret = lan78xx_reset(dev);
-       if (ret < 0)
-               goto done;
-
        phy_start(net->phydev);
 
        netif_dbg(dev, ifup, dev->net, "phy initialised successfully");
index 6afe896e5cb84f8833c69787cae2dbdaaa8de1e9..96d26cfae90bd2acf55854f4818f11d4b14f2f5a 100644 (file)
@@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 
 static unsigned int hwsim_net_id;
 
-static struct ida hwsim_netgroup_ida = IDA_INIT;
+static DEFINE_IDA(hwsim_netgroup_ida);
 
 struct hwsim_net {
        int netgroup;
index a65f2e1d9f531d35104815583579e6a002c17949..85997184e04734d09538176a2a0798145e1435b6 100644 (file)
@@ -20,7 +20,7 @@ if LIBNVDIMM
 config BLK_DEV_PMEM
        tristate "PMEM: Persistent memory block device support"
        default LIBNVDIMM
-       select DAX
+       select DAX_DRIVER
        select ND_BTT if BTT
        select ND_PFN if NVDIMM_PFN
        help
@@ -102,4 +102,15 @@ config NVDIMM_DAX
 
          Select Y if unsure
 
+config OF_PMEM
+       # FIXME: make tristate once OF_NUMA dependency removed
+       bool "Device-tree support for persistent memory regions"
+       depends on OF
+       default LIBNVDIMM
+       help
+         Allows regions of persistent memory to be described in the
+         device-tree.
+
+         Select Y if unsure.
+
 endif
index 70d5f3ad99091cfe9999e133e3e6c6369caf35b3..e8847045dac006e7fe884f6306bbc40c16a5512b 100644 (file)
@@ -4,6 +4,7 @@ obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
 obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
+obj-$(CONFIG_OF_PMEM) += of_pmem.o
 
 nd_pmem-y := pmem.o
 
index d58925295aa797a60098b3076056c08714e11bc3..795ad4ff35cafdb91ac7c9629aa63ad22a87f1f9 100644 (file)
@@ -26,7 +26,7 @@ static void nd_btt_release(struct device *dev)
        struct nd_region *nd_region = to_nd_region(dev->parent);
        struct nd_btt *nd_btt = to_nd_btt(dev);
 
-       dev_dbg(dev, "%s\n", __func__);
+       dev_dbg(dev, "trace\n");
        nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns);
        ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
        kfree(nd_btt->uuid);
@@ -74,8 +74,8 @@ static ssize_t sector_size_store(struct device *dev,
        nvdimm_bus_lock(dev);
        rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
                        btt_lbasize_supported);
-       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+                       buf[len - 1] == '\n' ? "" : "\n");
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
 
@@ -101,8 +101,8 @@ static ssize_t uuid_store(struct device *dev,
 
        device_lock(dev);
        rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
-       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+                       buf[len - 1] == '\n' ? "" : "\n");
        device_unlock(dev);
 
        return rc ? rc : len;
@@ -131,8 +131,8 @@ static ssize_t namespace_store(struct device *dev,
        device_lock(dev);
        nvdimm_bus_lock(dev);
        rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
-       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+                       buf[len - 1] == '\n' ? "" : "\n");
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
 
@@ -206,8 +206,8 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
        dev->groups = nd_btt_attribute_groups;
        device_initialize(&nd_btt->dev);
        if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
-               dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
-                               __func__, dev_name(ndns->claim));
+               dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
+                               dev_name(ndns->claim));
                put_device(dev);
                return NULL;
        }
@@ -346,8 +346,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
                return -ENOMEM;
        btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL);
        rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb);
-       dev_dbg(dev, "%s: btt: %s\n", __func__,
-                       rc == 0 ? dev_name(btt_dev) : "<none>");
+       dev_dbg(dev, "btt: %s\n", rc == 0 ? dev_name(btt_dev) : "<none>");
        if (rc < 0) {
                struct nd_btt *nd_btt = to_nd_btt(btt_dev);
 
index 78eabc3a1ab1337b68aef7b427c1ee59f570438c..a64023690cadeec34e251c38efeb4b036f0a91ac 100644 (file)
@@ -358,6 +358,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
        nvdimm_bus->dev.release = nvdimm_bus_release;
        nvdimm_bus->dev.groups = nd_desc->attr_groups;
        nvdimm_bus->dev.bus = &nvdimm_bus_type;
+       nvdimm_bus->dev.of_node = nd_desc->of_node;
        dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
        rc = device_register(&nvdimm_bus->dev);
        if (rc) {
@@ -984,8 +985,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
        if (cmd == ND_CMD_CALL) {
                func = pkg.nd_command;
-               dev_dbg(dev, "%s:%s, idx: %llu, in: %u, out: %u, len %llu\n",
-                               __func__, dimm_name, pkg.nd_command,
+               dev_dbg(dev, "%s, idx: %llu, in: %u, out: %u, len %llu\n",
+                               dimm_name, pkg.nd_command,
                                in_len, out_len, buf_len);
        }
 
@@ -996,8 +997,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
                u32 copy;
 
                if (out_size == UINT_MAX) {
-                       dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
-                                       __func__, dimm_name, cmd_name, i);
+                       dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
+                                       dimm_name, cmd_name, i);
                        return -EFAULT;
                }
                if (out_len < sizeof(out_env))
@@ -1012,9 +1013,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
        buf_len = (u64) out_len + (u64) in_len;
        if (buf_len > ND_IOCTL_MAX_BUFLEN) {
-               dev_dbg(dev, "%s:%s cmd: %s buf_len: %llu > %d\n", __func__,
-                               dimm_name, cmd_name, buf_len,
-                               ND_IOCTL_MAX_BUFLEN);
+               dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
+                               cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
                return -EINVAL;
        }
 
index b2fc29b8279b1e96682cbb9f29a81a84107bba74..30852270484fb3a2138ecf58df8b0d1ad16da4ba 100644 (file)
@@ -148,7 +148,7 @@ ssize_t nd_namespace_store(struct device *dev,
        char *name;
 
        if (dev->driver) {
-               dev_dbg(dev, "%s: -EBUSY\n", __func__);
+               dev_dbg(dev, "namespace already active\n");
                return -EBUSY;
        }
 
index 1dc527660637e5c63d9a19aad3e978b998e842ff..acce050856a800759466f385f2b883cc7fec3d79 100644 (file)
@@ -134,7 +134,7 @@ static void nvdimm_map_release(struct kref *kref)
        nvdimm_map = container_of(kref, struct nvdimm_map, kref);
        nvdimm_bus = nvdimm_map->nvdimm_bus;
 
-       dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
+       dev_dbg(&nvdimm_bus->dev, "%pa\n", &nvdimm_map->offset);
        list_del(&nvdimm_map->list);
        if (nvdimm_map->flags)
                memunmap(nvdimm_map->mem);
@@ -230,8 +230,8 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
 
        for (i = 0; i < 16; i++) {
                if (!isxdigit(str[0]) || !isxdigit(str[1])) {
-                       dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
-                                       __func__, i, str - buf, str[0],
+                       dev_dbg(dev, "pos: %d buf[%zd]: %c buf[%zd]: %c\n",
+                                       i, str - buf, str[0],
                                        str + 1 - buf, str[1]);
                        return -EINVAL;
                }
index 1bf2bd318371e967fd401c6d6777834d53adcdac..0453f49dc70814f35d2e0988f46304777f3e2559 100644 (file)
@@ -24,7 +24,7 @@ static void nd_dax_release(struct device *dev)
        struct nd_dax *nd_dax = to_nd_dax(dev);
        struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
 
-       dev_dbg(dev, "%s\n", __func__);
+       dev_dbg(dev, "trace\n");
        nd_detach_ndns(dev, &nd_pfn->ndns);
        ida_simple_remove(&nd_region->dax_ida, nd_pfn->id);
        kfree(nd_pfn->uuid);
@@ -129,8 +129,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
        pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
        nd_pfn->pfn_sb = pfn_sb;
        rc = nd_pfn_validate(nd_pfn, DAX_SIG);
-       dev_dbg(dev, "%s: dax: %s\n", __func__,
-                       rc == 0 ? dev_name(dax_dev) : "<none>");
+       dev_dbg(dev, "dax: %s\n", rc == 0 ? dev_name(dax_dev) : "<none>");
        if (rc < 0) {
                nd_detach_ndns(dax_dev, &nd_pfn->ndns);
                put_device(dax_dev);
index f8913b8124b62ff295f001c09b18d5acd0a299ed..233907889f96a36d6a3b87fc79cf82cabf602def 100644 (file)
@@ -67,9 +67,11 @@ static int nvdimm_probe(struct device *dev)
        ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
        nd_label_copy(ndd, to_next_namespace_index(ndd),
                        to_current_namespace_index(ndd));
-       rc = nd_label_reserve_dpa(ndd);
-       if (ndd->ns_current >= 0)
-               nvdimm_set_aliasing(dev);
+       if (ndd->ns_current >= 0) {
+               rc = nd_label_reserve_dpa(ndd);
+               if (rc == 0)
+                       nvdimm_set_aliasing(dev);
+       }
        nvdimm_clear_locked(dev);
        nvdimm_bus_unlock(dev);
 
index 097794d9f78637fd787f86a07060f8653fa67e4f..e00d45522b80e80fb4de68a8c506a3c8e68e6548 100644 (file)
@@ -131,7 +131,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
                }
                memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
        }
-       dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
+       dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
        kfree(cmd);
 
        return rc;
@@ -266,8 +266,7 @@ void nvdimm_drvdata_release(struct kref *kref)
        struct device *dev = ndd->dev;
        struct resource *res, *_r;
 
-       dev_dbg(dev, "%s\n", __func__);
-
+       dev_dbg(dev, "trace\n");
        nvdimm_bus_lock(dev);
        for_each_dpa_resource_safe(ndd, res, _r)
                nvdimm_free_dpa(ndd, res);
@@ -660,7 +659,7 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
        nd_synchronize();
 
        device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
-       dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
+       dev_dbg(&nvdimm_bus->dev, "count: %d\n", count);
        if (count != dimm_count)
                return -ENXIO;
        return 0;
index de66c02f61409e4693119752d3948568e18726df..1d28cd656536f4c086967faf97a277ec5d2c9ce8 100644 (file)
@@ -45,9 +45,27 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd)
        return ndd->nslabel_size;
 }
 
+static size_t __sizeof_namespace_index(u32 nslot)
+{
+       return ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
+                       NSINDEX_ALIGN);
+}
+
+static int __nvdimm_num_label_slots(struct nvdimm_drvdata *ndd,
+               size_t index_size)
+{
+       return (ndd->nsarea.config_size - index_size * 2) /
+                       sizeof_namespace_label(ndd);
+}
+
 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
 {
-       return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
+       u32 tmp_nslot, n;
+
+       tmp_nslot = ndd->nsarea.config_size / sizeof_namespace_label(ndd);
+       n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN;
+
+       return __nvdimm_num_label_slots(ndd, NSINDEX_ALIGN * n);
 }
 
 size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
@@ -55,18 +73,14 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
        u32 nslot, space, size;
 
        /*
-        * The minimum index space is 512 bytes, with that amount of
-        * index we can describe ~1400 labels which is less than a byte
-        * of overhead per label.  Round up to a byte of overhead per
-        * label and determine the size of the index region.  Yes, this
-        * starts to waste space at larger config_sizes, but it's
-        * unlikely we'll ever see anything but 128K.
+        * Per UEFI 2.7, the minimum size of the Label Storage Area is large
+        * enough to hold 2 index blocks and 2 labels.  The minimum index
+        * block size is 256 bytes, and the minimum label size is 256 bytes.
         */
        nslot = nvdimm_num_label_slots(ndd);
        space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
-       size = ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
-                       NSINDEX_ALIGN) * 2;
-       if (size <= space)
+       size = __sizeof_namespace_index(nslot) * 2;
+       if (size <= space && nslot >= 2)
                return size / 2;
 
        dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n",
@@ -121,8 +135,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 
                memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
                if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
-                       dev_dbg(dev, "%s: nsindex%d signature invalid\n",
-                                       __func__, i);
+                       dev_dbg(dev, "nsindex%d signature invalid\n", i);
                        continue;
                }
 
@@ -135,8 +148,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
                        labelsize = 128;
 
                if (labelsize != sizeof_namespace_label(ndd)) {
-                       dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n",
-                                       __func__, i, nsindex[i]->labelsize);
+                       dev_dbg(dev, "nsindex%d labelsize %d invalid\n",
+                                       i, nsindex[i]->labelsize);
                        continue;
                }
 
@@ -145,30 +158,28 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
                sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
                nsindex[i]->checksum = __cpu_to_le64(sum_save);
                if (sum != sum_save) {
-                       dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
-                                       __func__, i);
+                       dev_dbg(dev, "nsindex%d checksum invalid\n", i);
                        continue;
                }
 
                seq = __le32_to_cpu(nsindex[i]->seq);
                if ((seq & NSINDEX_SEQ_MASK) == 0) {
-                       dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
-                                       __func__, i, seq);
+                       dev_dbg(dev, "nsindex%d sequence: %#x invalid\n", i, seq);
                        continue;
                }
 
                /* sanity check the index against expected values */
                if (__le64_to_cpu(nsindex[i]->myoff)
                                != i * sizeof_namespace_index(ndd)) {
-                       dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
-                                       __func__, i, (unsigned long long)
+                       dev_dbg(dev, "nsindex%d myoff: %#llx invalid\n",
+                                       i, (unsigned long long)
                                        __le64_to_cpu(nsindex[i]->myoff));
                        continue;
                }
                if (__le64_to_cpu(nsindex[i]->otheroff)
                                != (!i) * sizeof_namespace_index(ndd)) {
-                       dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
-                                       __func__, i, (unsigned long long)
+                       dev_dbg(dev, "nsindex%d otheroff: %#llx invalid\n",
+                                       i, (unsigned long long)
                                        __le64_to_cpu(nsindex[i]->otheroff));
                        continue;
                }
@@ -176,8 +187,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
                size = __le64_to_cpu(nsindex[i]->mysize);
                if (size > sizeof_namespace_index(ndd)
                                || size < sizeof(struct nd_namespace_index)) {
-                       dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
-                                       __func__, i, size);
+                       dev_dbg(dev, "nsindex%d mysize: %#llx invalid\n", i, size);
                        continue;
                }
 
@@ -185,9 +195,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
                if (nslot * sizeof_namespace_label(ndd)
                                + 2 * sizeof_namespace_index(ndd)
                                > ndd->nsarea.config_size) {
-                       dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
-                                       __func__, i, nslot,
-                                       ndd->nsarea.config_size);
+                       dev_dbg(dev, "nsindex%d nslot: %u invalid, config_size: %#x\n",
+                                       i, nslot, ndd->nsarea.config_size);
                        continue;
                }
                valid[i] = true;
@@ -356,8 +365,8 @@ static bool slot_valid(struct nvdimm_drvdata *ndd,
                sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
                nd_label->checksum = __cpu_to_le64(sum_save);
                if (sum != sum_save) {
-                       dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n",
-                               __func__, slot, sum);
+                       dev_dbg(ndd->dev, "fail checksum. slot: %d expect: %#llx\n",
+                               slot, sum);
                        return false;
                }
        }
@@ -422,8 +431,8 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
                        u64 dpa = __le64_to_cpu(nd_label->dpa);
 
                        dev_dbg(ndd->dev,
-                               "%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
-                                       __func__, slot, label_slot, dpa, size);
+                               "slot%d invalid slot: %d dpa: %llx size: %llx\n",
+                                       slot, label_slot, dpa, size);
                        continue;
                }
                count++;
@@ -650,7 +659,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
        slot = nd_label_alloc_slot(ndd);
        if (slot == UINT_MAX)
                return -ENXIO;
-       dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+       dev_dbg(ndd->dev, "allocated: %d\n", slot);
 
        nd_label = to_label(ndd, slot);
        memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -678,7 +687,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
                sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
                nd_label->checksum = __cpu_to_le64(sum);
        }
-       nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
+       nd_dbg_dpa(nd_region, ndd, res, "\n");
 
        /* update label */
        offset = nd_label_offset(ndd, nd_label);
@@ -700,7 +709,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
                break;
        }
        if (victim) {
-               dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+               dev_dbg(ndd->dev, "free: %d\n", slot);
                slot = to_slot(ndd, victim->label);
                nd_label_free_slot(ndd, slot);
                victim->label = NULL;
@@ -868,7 +877,7 @@ static int __blk_label_update(struct nd_region *nd_region,
                slot = nd_label_alloc_slot(ndd);
                if (slot == UINT_MAX)
                        goto abort;
-               dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+               dev_dbg(ndd->dev, "allocated: %d\n", slot);
 
                nd_label = to_label(ndd, slot);
                memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -928,7 +937,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 
        /* free up now unused slots in the new index */
        for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) {
-               dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+               dev_dbg(ndd->dev, "free: %d\n", slot);
                nd_label_free_slot(ndd, slot);
        }
 
@@ -1092,7 +1101,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
                active--;
                slot = to_slot(ndd, nd_label);
                nd_label_free_slot(ndd, slot);
-               dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+               dev_dbg(ndd->dev, "free: %d\n", slot);
                list_move_tail(&label_ent->list, &list);
                label_ent->label = NULL;
        }
@@ -1100,7 +1109,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
 
        if (active == 0) {
                nd_mapping_free_labels(nd_mapping);
-               dev_dbg(ndd->dev, "%s: no more active labels\n", __func__);
+               dev_dbg(ndd->dev, "no more active labels\n");
        }
        mutex_unlock(&nd_mapping->lock);
 
index 1ebf4d3d01bac2ec81db5bf031ea068f1c977f72..18bbe183b3a9bde29efe0d8e4940975960d012fd 100644 (file)
@@ -33,7 +33,7 @@ enum {
        BTTINFO_UUID_LEN = 16,
        BTTINFO_FLAG_ERROR = 0x1,    /* error state (read-only) */
        BTTINFO_MAJOR_VERSION = 1,
-       ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
+       ND_LABEL_MIN_SIZE = 256 * 4, /* see sizeof_namespace_index() */
        ND_LABEL_ID_SIZE = 50,
        ND_NSINDEX_INIT = 0x1,
 };
index 658ada497be0a43f0a9df0d4a71aeaaa8df13bdb..28afdd668905019472d802be06695bfc06897b48 100644 (file)
@@ -421,7 +421,7 @@ static ssize_t alt_name_store(struct device *dev,
        rc = __alt_name_store(dev, buf, len);
        if (rc >= 0)
                rc = nd_namespace_label_update(nd_region, dev);
-       dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+       dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
 
@@ -1007,7 +1007,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
        if (uuid_not_set(uuid, dev, __func__))
                return -ENXIO;
        if (nd_region->ndr_mappings == 0) {
-               dev_dbg(dev, "%s: not associated with dimm(s)\n", __func__);
+               dev_dbg(dev, "not associated with dimm(s)\n");
                return -ENXIO;
        }
 
@@ -1105,8 +1105,7 @@ static ssize_t size_store(struct device *dev,
                *uuid = NULL;
        }
 
-       dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0
-                       ? "fail" : "success", rc);
+       dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
 
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
@@ -1270,8 +1269,8 @@ static ssize_t uuid_store(struct device *dev,
                rc = nd_namespace_label_update(nd_region, dev);
        else
                kfree(uuid);
-       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+                       buf[len - 1] == '\n' ? "" : "\n");
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
 
@@ -1355,9 +1354,8 @@ static ssize_t sector_size_store(struct device *dev,
                rc = nd_size_select_store(dev, buf, lbasize, supported);
        if (rc >= 0)
                rc = nd_namespace_label_update(nd_region, dev);
-       dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
-                       rc, rc < 0 ? "tried" : "wrote", buf,
-                       buf[len - 1] == '\n' ? "" : "\n");
+       dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
+                       buf, buf[len - 1] == '\n' ? "" : "\n");
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
 
@@ -1519,7 +1517,7 @@ static ssize_t holder_class_store(struct device *dev,
        rc = __holder_class_store(dev, buf);
        if (rc >= 0)
                rc = nd_namespace_label_update(nd_region, dev);
-       dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+       dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
 
@@ -1717,8 +1715,7 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
                if (uuid_not_set(nsblk->uuid, &ndns->dev, __func__))
                        return ERR_PTR(-ENODEV);
                if (!nsblk->lbasize) {
-                       dev_dbg(&ndns->dev, "%s: sector size not set\n",
-                               __func__);
+                       dev_dbg(&ndns->dev, "sector size not set\n");
                        return ERR_PTR(-ENODEV);
                }
                if (!nd_namespace_blk_validate(nsblk))
@@ -1798,9 +1795,7 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
                        }
 
                        if (found_uuid) {
-                               dev_dbg(ndd->dev,
-                                               "%s duplicate entry for uuid\n",
-                                               __func__);
+                               dev_dbg(ndd->dev, "duplicate entry for uuid\n");
                                return false;
                        }
                        found_uuid = true;
@@ -1926,7 +1921,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
        }
 
        if (i < nd_region->ndr_mappings) {
-               struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
+               struct nvdimm *nvdimm = nd_region->mapping[i].nvdimm;
 
                /*
                 * Give up if we don't find an instance of a uuid at each
@@ -1934,7 +1929,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
                 * find a dimm with two instances of the same uuid.
                 */
                dev_err(&nd_region->dev, "%s missing label for %pUb\n",
-                               dev_name(ndd->dev), nd_label->uuid);
+                               nvdimm_name(nvdimm), nd_label->uuid);
                rc = -EINVAL;
                goto err;
        }
@@ -1994,14 +1989,13 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
        namespace_pmem_release(dev);
        switch (rc) {
        case -EINVAL:
-               dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
+               dev_dbg(&nd_region->dev, "invalid label(s)\n");
                break;
        case -ENODEV:
-               dev_dbg(&nd_region->dev, "%s: label not found\n", __func__);
+               dev_dbg(&nd_region->dev, "label not found\n");
                break;
        default:
-               dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n",
-                               __func__, rc);
+               dev_dbg(&nd_region->dev, "unexpected err: %d\n", rc);
                break;
        }
        return ERR_PTR(rc);
@@ -2334,8 +2328,8 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
        }
 
-       dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
-                       __func__, count, is_nd_blk(&nd_region->dev)
+       dev_dbg(&nd_region->dev, "discovered %d %s namespace%s\n",
+                       count, is_nd_blk(&nd_region->dev)
                        ? "blk" : "pmem", count == 1 ? "" : "s");
 
        if (count == 0) {
@@ -2467,7 +2461,7 @@ static int init_active_labels(struct nd_region *nd_region)
                get_ndd(ndd);
 
                count = nd_label_active_count(ndd);
-               dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
+               dev_dbg(ndd->dev, "count: %d\n", count);
                if (!count)
                        continue;
                for (j = 0; j < count; j++) {
index 184e070d50a2b62a131ac0a36054010d505b7d56..32e0364b48b9d70f2f218f53734a6abe1c18e9b0 100644 (file)
@@ -340,7 +340,6 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region)
 }
 #endif
 
-struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
 u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
new file mode 100644 (file)
index 0000000..85013ba
--- /dev/null
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#define pr_fmt(fmt) "of_pmem: " fmt
+
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+static const struct attribute_group *region_attr_groups[] = {
+       &nd_region_attribute_group,
+       &nd_device_attribute_group,
+       NULL,
+};
+
+static const struct attribute_group *bus_attr_groups[] = {
+       &nvdimm_bus_attribute_group,
+       NULL,
+};
+
+struct of_pmem_private {
+       struct nvdimm_bus_descriptor bus_desc;
+       struct nvdimm_bus *bus;
+};
+
+static int of_pmem_region_probe(struct platform_device *pdev)
+{
+       struct of_pmem_private *priv;
+       struct device_node *np;
+       struct nvdimm_bus *bus;
+       bool is_volatile;
+       int i;
+
+       np = dev_of_node(&pdev->dev);
+       if (!np)
+               return -ENXIO;
+
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->bus_desc.attr_groups = bus_attr_groups;
+       priv->bus_desc.provider_name = "of_pmem";
+       priv->bus_desc.module = THIS_MODULE;
+       priv->bus_desc.of_node = np;
+
+       priv->bus = bus = nvdimm_bus_register(&pdev->dev, &priv->bus_desc);
+       if (!bus) {
+               kfree(priv);
+               return -ENODEV;
+       }
+       platform_set_drvdata(pdev, priv);
+
+       is_volatile = !!of_find_property(np, "volatile", NULL);
+       dev_dbg(&pdev->dev, "Registering %s regions from %pOF\n",
+                       is_volatile ? "volatile" : "non-volatile",  np);
+
+       for (i = 0; i < pdev->num_resources; i++) {
+               struct nd_region_desc ndr_desc;
+               struct nd_region *region;
+
+               /*
+                * NB: libnvdimm copies the data from ndr_desc into it's own
+                * structures so passing a stack pointer is fine.
+                */
+               memset(&ndr_desc, 0, sizeof(ndr_desc));
+               ndr_desc.attr_groups = region_attr_groups;
+               ndr_desc.numa_node = of_node_to_nid(np);
+               ndr_desc.res = &pdev->resource[i];
+               ndr_desc.of_node = np;
+               set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+
+               if (is_volatile)
+                       region = nvdimm_volatile_region_create(bus, &ndr_desc);
+               else
+                       region = nvdimm_pmem_region_create(bus, &ndr_desc);
+
+               if (!region)
+                       dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
+                                       ndr_desc.res, np);
+               else
+                       dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
+                                       ndr_desc.res, np);
+       }
+
+       return 0;
+}
+
+static int of_pmem_region_remove(struct platform_device *pdev)
+{
+       struct of_pmem_private *priv = platform_get_drvdata(pdev);
+
+       nvdimm_bus_unregister(priv->bus);
+       kfree(priv);
+
+       return 0;
+}
+
+static const struct of_device_id of_pmem_region_match[] = {
+       { .compatible = "pmem-region" },
+       { },
+};
+
+static struct platform_driver of_pmem_region_driver = {
+       .probe = of_pmem_region_probe,
+       .remove = of_pmem_region_remove,
+       .driver = {
+               .name = "of_pmem",
+               .owner = THIS_MODULE,
+               .of_match_table = of_pmem_region_match,
+       },
+};
+
+module_platform_driver(of_pmem_region_driver);
+MODULE_DEVICE_TABLE(of, of_pmem_region_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
index 2f4d18752c9772b08aa4d8e22a023fc526a7248b..30b08791597d7f76175fec0f4432d8fda1b83109 100644 (file)
@@ -27,7 +27,7 @@ static void nd_pfn_release(struct device *dev)
        struct nd_region *nd_region = to_nd_region(dev->parent);
        struct nd_pfn *nd_pfn = to_nd_pfn(dev);
 
-       dev_dbg(dev, "%s\n", __func__);
+       dev_dbg(dev, "trace\n");
        nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns);
        ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id);
        kfree(nd_pfn->uuid);
@@ -94,8 +94,8 @@ static ssize_t mode_store(struct device *dev,
                else
                        rc = -EINVAL;
        }
-       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+                       buf[len - 1] == '\n' ? "" : "\n");
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
 
@@ -144,8 +144,8 @@ static ssize_t align_store(struct device *dev,
        nvdimm_bus_lock(dev);
        rc = nd_size_select_store(dev, buf, &nd_pfn->align,
                        nd_pfn_supported_alignments());
-       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+                       buf[len - 1] == '\n' ? "" : "\n");
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
 
@@ -171,8 +171,8 @@ static ssize_t uuid_store(struct device *dev,
 
        device_lock(dev);
        rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
-       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+                       buf[len - 1] == '\n' ? "" : "\n");
        device_unlock(dev);
 
        return rc ? rc : len;
@@ -201,8 +201,8 @@ static ssize_t namespace_store(struct device *dev,
        device_lock(dev);
        nvdimm_bus_lock(dev);
        rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
-       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+                       buf[len - 1] == '\n' ? "" : "\n");
        nvdimm_bus_unlock(dev);
        device_unlock(dev);
 
@@ -314,8 +314,8 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
        dev = &nd_pfn->dev;
        device_initialize(&nd_pfn->dev);
        if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
-               dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
-                               __func__, dev_name(ndns->claim));
+               dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
+                               dev_name(ndns->claim));
                put_device(dev);
                return NULL;
        }
@@ -510,8 +510,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
        nd_pfn = to_nd_pfn(pfn_dev);
        nd_pfn->pfn_sb = pfn_sb;
        rc = nd_pfn_validate(nd_pfn, PFN_SIG);
-       dev_dbg(dev, "%s: pfn: %s\n", __func__,
-                       rc == 0 ? dev_name(pfn_dev) : "<none>");
+       dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>");
        if (rc < 0) {
                nd_detach_ndns(pfn_dev, &nd_pfn->ndns);
                put_device(pfn_dev);
index 5a96d30c294a4b0b77ba4c9aa4a14db26b852f51..9d714926ecf525e90e6f15892da8c931cdb4151c 100644 (file)
@@ -66,7 +66,7 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
                rc = BLK_STS_IOERR;
        if (cleared > 0 && cleared / 512) {
                cleared /= 512;
-               dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
+               dev_dbg(dev, "%#llx clear %ld sector%s\n",
                                (unsigned long long) sector, cleared,
                                cleared > 1 ? "s" : "");
                badblocks_clear(&pmem->bb, sector, cleared);
@@ -547,17 +547,7 @@ static struct nd_device_driver nd_pmem_driver = {
        .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
 };
 
-static int __init pmem_init(void)
-{
-       return nd_driver_register(&nd_pmem_driver);
-}
-module_init(pmem_init);
-
-static void pmem_exit(void)
-{
-       driver_unregister(&nd_pmem_driver.drv);
-}
-module_exit(pmem_exit);
+module_nd_driver(nd_pmem_driver);
 
 MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
 MODULE_LICENSE("GPL v2");
index 034f0a07d627ea719b447d6f3c64b31f86c902e9..b9ca0033cc9996b295fe28e96369531ef87ab20a 100644 (file)
@@ -27,10 +27,10 @@ static int nd_region_probe(struct device *dev)
        if (nd_region->num_lanes > num_online_cpus()
                        && nd_region->num_lanes < num_possible_cpus()
                        && !test_and_set_bit(0, &once)) {
-               dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
+               dev_dbg(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
                                num_online_cpus(), nd_region->num_lanes,
                                num_possible_cpus());
-               dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
+               dev_dbg(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
                                nd_region->num_lanes);
        }
 
index 1593e1806b16c6b413ea1e5555987b88c6740286..a612be6f019d49b1ce08da99e981493b1f9e22d7 100644 (file)
@@ -182,6 +182,14 @@ struct nd_region *to_nd_region(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(to_nd_region);
 
+struct device *nd_region_dev(struct nd_region *nd_region)
+{
+       if (!nd_region)
+               return NULL;
+       return &nd_region->dev;
+}
+EXPORT_SYMBOL_GPL(nd_region_dev);
+
 struct nd_blk_region *to_nd_blk_region(struct device *dev)
 {
        struct nd_region *nd_region = to_nd_region(dev);
@@ -1014,6 +1022,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
        dev->parent = &nvdimm_bus->dev;
        dev->type = dev_type;
        dev->groups = ndr_desc->attr_groups;
+       dev->of_node = ndr_desc->of_node;
        nd_region->ndr_size = resource_size(ndr_desc->res);
        nd_region->ndr_start = ndr_desc->res->start;
        nd_device_register(dev);
index 02c5984ab09b038023557eecc40bc8b39a3f4ba6..6bb37c18292a855c90d80c9f162dcb6ae4cfeed5 100644 (file)
@@ -295,7 +295,7 @@ static void __init of_unittest_printf(void)
                return;
        }
 
-       num_to_str(phandle_str, sizeof(phandle_str), np->phandle);
+       num_to_str(phandle_str, sizeof(phandle_str), np->phandle, 0);
 
        of_unittest_printf_one(np, "%pOF",  full_name);
        of_unittest_printf_one(np, "%pOFf", full_name);
index 313cf8ad77bf68ce32dd0affe4acfda1b97f9d63..ea9e7f4479cad668dc261ee4386dee272f729888 100644 (file)
@@ -93,9 +93,11 @@ struct mlxreg_hotplug_priv_data {
        bool after_probe;
 };
 
-static int mlxreg_hotplug_device_create(struct device *dev,
+static int mlxreg_hotplug_device_create(struct mlxreg_hotplug_priv_data *priv,
                                        struct mlxreg_core_data *data)
 {
+       struct mlxreg_core_hotplug_platform_data *pdata;
+
        /*
         * Return if adapter number is negative. It could be in case hotplug
         * event is not associated with hotplug device.
@@ -103,19 +105,21 @@ static int mlxreg_hotplug_device_create(struct device *dev,
        if (data->hpdev.nr < 0)
                return 0;
 
-       data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr);
+       pdata = dev_get_platdata(&priv->pdev->dev);
+       data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr +
+                                             pdata->shift_nr);
        if (!data->hpdev.adapter) {
-               dev_err(dev, "Failed to get adapter for bus %d\n",
-                       data->hpdev.nr);
+               dev_err(priv->dev, "Failed to get adapter for bus %d\n",
+                       data->hpdev.nr + pdata->shift_nr);
                return -EFAULT;
        }
 
        data->hpdev.client = i2c_new_device(data->hpdev.adapter,
                                            data->hpdev.brdinfo);
        if (!data->hpdev.client) {
-               dev_err(dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
-                       data->hpdev.brdinfo->type, data->hpdev.nr,
-                       data->hpdev.brdinfo->addr);
+               dev_err(priv->dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
+                       data->hpdev.brdinfo->type, data->hpdev.nr +
+                       pdata->shift_nr, data->hpdev.brdinfo->addr);
 
                i2c_put_adapter(data->hpdev.adapter);
                data->hpdev.adapter = NULL;
@@ -270,10 +274,10 @@ mlxreg_hotplug_work_helper(struct mlxreg_hotplug_priv_data *priv,
                        if (item->inversed)
                                mlxreg_hotplug_device_destroy(data);
                        else
-                               mlxreg_hotplug_device_create(priv->dev, data);
+                               mlxreg_hotplug_device_create(priv, data);
                } else {
                        if (item->inversed)
-                               mlxreg_hotplug_device_create(priv->dev, data);
+                               mlxreg_hotplug_device_create(priv, data);
                        else
                                mlxreg_hotplug_device_destroy(data);
                }
@@ -319,7 +323,7 @@ mlxreg_hotplug_health_work_helper(struct mlxreg_hotplug_priv_data *priv,
                if (regval == MLXREG_HOTPLUG_HEALTH_MASK) {
                        if ((data->health_cntr++ == MLXREG_HOTPLUG_RST_CNTR) ||
                            !priv->after_probe) {
-                               mlxreg_hotplug_device_create(priv->dev, data);
+                               mlxreg_hotplug_device_create(priv, data);
                                data->attached = true;
                        }
                } else {
@@ -550,6 +554,7 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev)
 {
        struct mlxreg_core_hotplug_platform_data *pdata;
        struct mlxreg_hotplug_priv_data *priv;
+       struct i2c_adapter *deferred_adap;
        int err;
 
        pdata = dev_get_platdata(&pdev->dev);
@@ -558,6 +563,12 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
+       /* Defer probing if the necessary adapter is not configured yet. */
+       deferred_adap = i2c_get_adapter(pdata->deferred_nr);
+       if (!deferred_adap)
+               return -EPROBE_DEFER;
+       i2c_put_adapter(deferred_adap);
+
        priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
index ef016e46544a5a280c4c1d9d4d2d1509503139c8..39d06dd1f63a8c94710df2a716cfa6faf6a0aabb 100644 (file)
@@ -757,6 +757,8 @@ config TOPSTAR_LAPTOP
        depends on ACPI
        depends on INPUT
        select INPUT_SPARSEKMAP
+       select LEDS_CLASS
+       select NEW_LEDS
        ---help---
          This driver adds support for hotkeys found on Topstar laptops.
 
@@ -1174,6 +1176,7 @@ config INTEL_TELEMETRY
 
 config MLX_PLATFORM
        tristate "Mellanox Technologies platform support"
+       depends on I2C && REGMAP
        ---help---
          This option enables system support for the Mellanox Technologies
          platform. The Mellanox systems provide data center networking
index 2485c80a9fddb772c5085f4e2074a14981b26ba4..33fb2a20458a58fba2bee9ce79ad7571c184c7f3 100644 (file)
@@ -514,7 +514,7 @@ static int build_tokens_sysfs(struct platform_device *dev)
                continue;
 
 loop_fail_create_value:
-               kfree(value_name);
+               kfree(location_name);
                goto out_unwind_strings;
        }
        smbios_attribute_group.attrs = token_attrs;
@@ -525,7 +525,7 @@ loop_fail_create_value:
        return 0;
 
 out_unwind_strings:
-       for (i = i-1; i > 0; i--) {
+       while (i--) {
                kfree(token_location_attrs[i].attr.name);
                kfree(token_value_attrs[i].attr.name);
        }
index 2cfbd3fa5136002362bdc558c14d00e40ed56e22..cd95b6f3a06405835edadfc6ea92a5b46bab591f 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
+#include <linux/bitops.h>
 #include <linux/dmi.h>
 #include <linux/backlight.h>
 #include <linux/fb.h>
 #include <linux/kfifo.h>
 #include <linux/leds.h>
 #include <linux/platform_device.h>
-#include <linux/slab.h>
 #include <acpi/video.h>
 
-#define FUJITSU_DRIVER_VERSION "0.6.0"
+#define FUJITSU_DRIVER_VERSION         "0.6.0"
 
-#define FUJITSU_LCD_N_LEVELS 8
+#define FUJITSU_LCD_N_LEVELS           8
 
 #define ACPI_FUJITSU_CLASS             "fujitsu"
 #define ACPI_FUJITSU_BL_HID            "FUJ02B1"
 #define ACPI_FUJITSU_LAPTOP_DRIVER_NAME        "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
 #define ACPI_FUJITSU_LAPTOP_DEVICE_NAME        "Fujitsu FUJ02E3"
 
-#define ACPI_FUJITSU_NOTIFY_CODE1     0x80
+#define ACPI_FUJITSU_NOTIFY_CODE       0x80
 
 /* FUNC interface - command values */
-#define FUNC_FLAGS     0x1000
-#define FUNC_LEDS      0x1001
-#define FUNC_BUTTONS   0x1002
-#define FUNC_BACKLIGHT  0x1004
+#define FUNC_FLAGS                     BIT(12)
+#define FUNC_LEDS                      (BIT(12) | BIT(0))
+#define FUNC_BUTTONS                   (BIT(12) | BIT(1))
+#define FUNC_BACKLIGHT                 (BIT(12) | BIT(2))
 
 /* FUNC interface - responses */
-#define UNSUPPORTED_CMD 0x80000000
+#define UNSUPPORTED_CMD                        0x80000000
 
 /* FUNC interface - status flags */
-#define FLAG_RFKILL    0x020
-#define FLAG_LID       0x100
-#define FLAG_DOCK      0x200
+#define FLAG_RFKILL                    BIT(5)
+#define FLAG_LID                       BIT(8)
+#define FLAG_DOCK                      BIT(9)
 
 /* FUNC interface - LED control */
-#define FUNC_LED_OFF   0x1
-#define FUNC_LED_ON    0x30001
-#define KEYBOARD_LAMPS 0x100
-#define LOGOLAMP_POWERON 0x2000
-#define LOGOLAMP_ALWAYS  0x4000
-#define RADIO_LED_ON   0x20
-#define ECO_LED        0x10000
-#define ECO_LED_ON     0x80000
-
-/* Hotkey details */
-#define KEY1_CODE      0x410   /* codes for the keys in the GIRB register */
-#define KEY2_CODE      0x411
-#define KEY3_CODE      0x412
-#define KEY4_CODE      0x413
-#define KEY5_CODE      0x420
-
-#define MAX_HOTKEY_RINGBUFFER_SIZE 100
-#define RINGBUFFERSIZE 40
+#define FUNC_LED_OFF                   BIT(0)
+#define FUNC_LED_ON                    (BIT(0) | BIT(16) | BIT(17))
+#define LOGOLAMP_POWERON               BIT(13)
+#define LOGOLAMP_ALWAYS                        BIT(14)
+#define KEYBOARD_LAMPS                 BIT(8)
+#define RADIO_LED_ON                   BIT(5)
+#define ECO_LED                                BIT(16)
+#define ECO_LED_ON                     BIT(19)
+
+/* FUNC interface - backlight power control */
+#define BACKLIGHT_PARAM_POWER          BIT(2)
+#define BACKLIGHT_OFF                  (BIT(0) | BIT(1))
+#define BACKLIGHT_ON                   0
+
+/* Scancodes read from the GIRB register */
+#define KEY1_CODE                      0x410
+#define KEY2_CODE                      0x411
+#define KEY3_CODE                      0x412
+#define KEY4_CODE                      0x413
+#define KEY5_CODE                      0x420
+
+/* Hotkey ringbuffer limits */
+#define MAX_HOTKEY_RINGBUFFER_SIZE     100
+#define RINGBUFFERSIZE                 40
+
+/* Module parameters */
+static int use_alt_lcd_levels = -1;
+static bool disable_brightness_adjust;
 
 /* Device controlling the backlight and associated keys */
 struct fujitsu_bl {
@@ -122,8 +132,6 @@ struct fujitsu_bl {
 };
 
 static struct fujitsu_bl *fujitsu_bl;
-static int use_alt_lcd_levels = -1;
-static bool disable_brightness_adjust;
 
 /* Device used to access hotkeys and other features on the laptop */
 struct fujitsu_laptop {
@@ -256,9 +264,11 @@ static int bl_update_status(struct backlight_device *b)
 
        if (fext) {
                if (b->props.power == FB_BLANK_POWERDOWN)
-                       call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
+                       call_fext_func(fext, FUNC_BACKLIGHT, 0x1,
+                                      BACKLIGHT_PARAM_POWER, BACKLIGHT_OFF);
                else
-                       call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
+                       call_fext_func(fext, FUNC_BACKLIGHT, 0x1,
+                                      BACKLIGHT_PARAM_POWER, BACKLIGHT_ON);
        }
 
        return set_lcd_level(device, b->props.brightness);
@@ -385,7 +395,7 @@ static int fujitsu_backlight_register(struct acpi_device *device)
 static int acpi_fujitsu_bl_add(struct acpi_device *device)
 {
        struct fujitsu_bl *priv;
-       int error;
+       int ret;
 
        if (acpi_video_get_backlight_type() != acpi_backlight_vendor)
                return -ENODEV;
@@ -399,10 +409,6 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device)
        strcpy(acpi_device_class(device), ACPI_FUJITSU_CLASS);
        device->driver_data = priv;
 
-       error = acpi_fujitsu_bl_input_setup(device);
-       if (error)
-               return error;
-
        pr_info("ACPI: %s [%s]\n",
                acpi_device_name(device), acpi_device_bid(device));
 
@@ -410,11 +416,11 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device)
                priv->max_brightness = FUJITSU_LCD_N_LEVELS;
        get_lcd_level(device);
 
-       error = fujitsu_backlight_register(device);
-       if (error)
-               return error;
+       ret = acpi_fujitsu_bl_input_setup(device);
+       if (ret)
+               return ret;
 
-       return 0;
+       return fujitsu_backlight_register(device);
 }
 
 /* Brightness notify */
@@ -424,7 +430,7 @@ static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
        struct fujitsu_bl *priv = acpi_driver_data(device);
        int oldb, newb;
 
-       if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
+       if (event != ACPI_FUJITSU_NOTIFY_CODE) {
                acpi_handle_info(device->handle, "unsupported event [0x%x]\n",
                                 event);
                sparse_keymap_report_event(priv->input, -1, 1, true);
@@ -455,7 +461,9 @@ static const struct key_entry keymap_default[] = {
        { KE_KEY, KEY3_CODE, { KEY_PROG3 } },
        { KE_KEY, KEY4_CODE, { KEY_PROG4 } },
        { KE_KEY, KEY5_CODE, { KEY_RFKILL } },
+       { KE_KEY, BIT(5),    { KEY_RFKILL } },
        { KE_KEY, BIT(26),   { KEY_TOUCHPAD_TOGGLE } },
+       { KE_KEY, BIT(29),   { KEY_MICMUTE } },
        { KE_END, 0 }
 };
 
@@ -693,7 +701,7 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 {
        struct fujitsu_laptop *priv = acpi_driver_data(device);
        struct led_classdev *led;
-       int result;
+       int ret;
 
        if (call_fext_func(device,
                           FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
@@ -704,9 +712,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
                led->name = "fujitsu::logolamp";
                led->brightness_set_blocking = logolamp_set;
                led->brightness_get = logolamp_get;
-               result = devm_led_classdev_register(&device->dev, led);
-               if (result)
-                       return result;
+               ret = devm_led_classdev_register(&device->dev, led);
+               if (ret)
+                       return ret;
        }
 
        if ((call_fext_func(device,
@@ -719,9 +727,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
                led->name = "fujitsu::kblamps";
                led->brightness_set_blocking = kblamps_set;
                led->brightness_get = kblamps_get;
-               result = devm_led_classdev_register(&device->dev, led);
-               if (result)
-                       return result;
+               ret = devm_led_classdev_register(&device->dev, led);
+               if (ret)
+                       return ret;
        }
 
        /*
@@ -742,9 +750,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
                led->brightness_set_blocking = radio_led_set;
                led->brightness_get = radio_led_get;
                led->default_trigger = "rfkill-any";
-               result = devm_led_classdev_register(&device->dev, led);
-               if (result)
-                       return result;
+               ret = devm_led_classdev_register(&device->dev, led);
+               if (ret)
+                       return ret;
        }
 
        /* Support for eco led is not always signaled in bit corresponding
@@ -762,9 +770,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
                led->name = "fujitsu::eco_led";
                led->brightness_set_blocking = eco_led_set;
                led->brightness_get = eco_led_get;
-               result = devm_led_classdev_register(&device->dev, led);
-               if (result)
-                       return result;
+               ret = devm_led_classdev_register(&device->dev, led);
+               if (ret)
+                       return ret;
        }
 
        return 0;
@@ -773,8 +781,7 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 {
        struct fujitsu_laptop *priv;
-       int error;
-       int i;
+       int ret, i = 0;
 
        priv = devm_kzalloc(&device->dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
@@ -789,23 +796,16 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 
        /* kfifo */
        spin_lock_init(&priv->fifo_lock);
-       error = kfifo_alloc(&priv->fifo, RINGBUFFERSIZE * sizeof(int),
-                           GFP_KERNEL);
-       if (error) {
-               pr_err("kfifo_alloc failed\n");
-               goto err_stop;
-       }
-
-       error = acpi_fujitsu_laptop_input_setup(device);
-       if (error)
-               goto err_free_fifo;
+       ret = kfifo_alloc(&priv->fifo, RINGBUFFERSIZE * sizeof(int),
+                         GFP_KERNEL);
+       if (ret)
+               return ret;
 
        pr_info("ACPI: %s [%s]\n",
                acpi_device_name(device), acpi_device_bid(device));
 
-       i = 0;
-       while (call_fext_func(device, FUNC_BUTTONS, 0x1, 0x0, 0x0) != 0
-               && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE)
+       while (call_fext_func(device, FUNC_BUTTONS, 0x1, 0x0, 0x0) != 0 &&
+              i++ < MAX_HOTKEY_RINGBUFFER_SIZE)
                ; /* No action, result is discarded */
        acpi_handle_debug(device->handle, "Discarded %i ringbuffer entries\n",
                          i);
@@ -829,26 +829,31 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device)
        /* Sync backlight power status */
        if (fujitsu_bl && fujitsu_bl->bl_device &&
            acpi_video_get_backlight_type() == acpi_backlight_vendor) {
-               if (call_fext_func(fext, FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
+               if (call_fext_func(fext, FUNC_BACKLIGHT, 0x2,
+                                  BACKLIGHT_PARAM_POWER, 0x0) == BACKLIGHT_OFF)
                        fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN;
                else
                        fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK;
        }
 
-       error = acpi_fujitsu_laptop_leds_register(device);
-       if (error)
+       ret = acpi_fujitsu_laptop_input_setup(device);
+       if (ret)
+               goto err_free_fifo;
+
+       ret = acpi_fujitsu_laptop_leds_register(device);
+       if (ret)
                goto err_free_fifo;
 
-       error = fujitsu_laptop_platform_add(device);
-       if (error)
+       ret = fujitsu_laptop_platform_add(device);
+       if (ret)
                goto err_free_fifo;
 
        return 0;
 
 err_free_fifo:
        kfifo_free(&priv->fifo);
-err_stop:
-       return error;
+
+       return ret;
 }
 
 static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
@@ -865,11 +870,11 @@ static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
 static void acpi_fujitsu_laptop_press(struct acpi_device *device, int scancode)
 {
        struct fujitsu_laptop *priv = acpi_driver_data(device);
-       int status;
+       int ret;
 
-       status = kfifo_in_locked(&priv->fifo, (unsigned char *)&scancode,
-                                sizeof(scancode), &priv->fifo_lock);
-       if (status != sizeof(scancode)) {
+       ret = kfifo_in_locked(&priv->fifo, (unsigned char *)&scancode,
+                             sizeof(scancode), &priv->fifo_lock);
+       if (ret != sizeof(scancode)) {
                dev_info(&priv->input->dev, "Could not push scancode [0x%x]\n",
                         scancode);
                return;
@@ -882,13 +887,12 @@ static void acpi_fujitsu_laptop_press(struct acpi_device *device, int scancode)
 static void acpi_fujitsu_laptop_release(struct acpi_device *device)
 {
        struct fujitsu_laptop *priv = acpi_driver_data(device);
-       int scancode, status;
+       int scancode, ret;
 
        while (true) {
-               status = kfifo_out_locked(&priv->fifo,
-                                         (unsigned char *)&scancode,
-                                         sizeof(scancode), &priv->fifo_lock);
-               if (status != sizeof(scancode))
+               ret = kfifo_out_locked(&priv->fifo, (unsigned char *)&scancode,
+                                      sizeof(scancode), &priv->fifo_lock);
+               if (ret != sizeof(scancode))
                        return;
                sparse_keymap_report_event(priv->input, scancode, 0, false);
                dev_dbg(&priv->input->dev,
@@ -899,10 +903,10 @@ static void acpi_fujitsu_laptop_release(struct acpi_device *device)
 static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 {
        struct fujitsu_laptop *priv = acpi_driver_data(device);
-       int scancode, i = 0;
+       int scancode, i = 0, ret;
        unsigned int irb;
 
-       if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
+       if (event != ACPI_FUJITSU_NOTIFY_CODE) {
                acpi_handle_info(device->handle, "Unsupported event [0x%x]\n",
                                 event);
                sparse_keymap_report_event(priv->input, -1, 1, true);
@@ -930,9 +934,18 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
         * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
         * handled in software; its state is queried using FUNC_FLAGS
         */
-       if ((priv->flags_supported & BIT(26)) &&
-           (call_fext_func(device, FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26)))
-               sparse_keymap_report_event(priv->input, BIT(26), 1, true);
+       if (priv->flags_supported & (BIT(5) | BIT(26) | BIT(29))) {
+               ret = call_fext_func(device, FUNC_FLAGS, 0x1, 0x0, 0x0);
+               if (ret & BIT(5))
+                       sparse_keymap_report_event(priv->input,
+                                                  BIT(5), 1, true);
+               if (ret & BIT(26))
+                       sparse_keymap_report_event(priv->input,
+                                                  BIT(26), 1, true);
+               if (ret & BIT(29))
+                       sparse_keymap_report_event(priv->input,
+                                                  BIT(29), 1, true);
+       }
 }
 
 /* Initialization */
index 2d645c505f8190b02b875177869fa158032375f3..be85ed966bf33bd7b809440f53dd0dac1b405273 100644 (file)
 static int temp_limits[3] = { 55000, 60000, 65000 };
 module_param_array(temp_limits, int, NULL, 0444);
 MODULE_PARM_DESC(temp_limits,
-                "Milli-celcius values above which the fan speed increases");
+                "Millicelsius values above which the fan speed increases");
 
 static int hysteresis = 3000;
 module_param(hysteresis, int, 0444);
 MODULE_PARM_DESC(hysteresis,
-                "Hysteresis in milli-celcius before lowering the fan speed");
+                "Hysteresis in millicelsius before lowering the fan speed");
 
 static int speed_on_ac = 2;
 module_param(speed_on_ac, int, 0444);
index 5e3df194723e1a1f1757fff08dd3c89069381350..b5adba2277832262883ef7034460898138de32ff 100644 (file)
  *
  */
 
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+#include <linux/input.h>
+#include <linux/input/sparse-keymap.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/input.h>
 #include <linux/platform_device.h>
-#include <linux/input/sparse-keymap.h>
-#include <linux/acpi.h>
 #include <linux/suspend.h>
-#include <acpi/acpi_bus.h>
-#include <linux/dmi.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Alex Hung");
@@ -67,8 +65,8 @@ static const struct key_entry intel_array_keymap[] = {
        { KE_IGNORE, 0xC5, { KEY_VOLUMEUP } },                /* Release */
        { KE_KEY,    0xC6, { KEY_VOLUMEDOWN } },              /* Press */
        { KE_IGNORE, 0xC7, { KEY_VOLUMEDOWN } },              /* Release */
-       { KE_SW,     0xC8, { .sw = { SW_ROTATE_LOCK, 1 } } }, /* Press */
-       { KE_SW,     0xC9, { .sw = { SW_ROTATE_LOCK, 0 } } }, /* Release */
+       { KE_KEY,    0xC8, { KEY_ROTATE_LOCK_TOGGLE } },      /* Press */
+       { KE_IGNORE, 0xC9, { KEY_ROTATE_LOCK_TOGGLE } },      /* Release */
        { KE_KEY,    0xCE, { KEY_POWER } },                   /* Press */
        { KE_IGNORE, 0xCF, { KEY_POWER } },                   /* Release */
        { KE_END },
index d4ea01805879b4c9049b1657fad6f6ddb9de27d9..a6d5aa0c3c479dbd7f345a3f5666ae0ef72e456d 100644 (file)
@@ -138,9 +138,6 @@ static int __init itmt_legacy_init(void)
        if (!id)
                return -ENODEV;
 
-       if (boot_cpu_has(X86_FEATURE_HWP))
-               return -ENODEV;
-
        ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
                                "platform/x86/turbo_max_3:online",
                                itmt_legacy_cpu_online, NULL);
index 454e14f022855000ecfb9961388c812e6763a20d..7a0bd24c1ae2dcd5304150d52a54d124192e753a 100644 (file)
 #define MLXPLAT_CPLD_FAN_MASK          GENMASK(3, 0)
 #define MLXPLAT_CPLD_FAN_NG_MASK       GENMASK(5, 0)
 
+/* Default I2C parent bus number */
+#define MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR       1
+
+/* Maximum number of possible physical buses equipped on system */
+#define MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM      16
+
+/* Number of channels in group */
+#define MLXPLAT_CPLD_GRP_CHNL_NUM              8
+
 /* Start channel numbers */
 #define MLXPLAT_CPLD_CH1                       2
 #define MLXPLAT_CPLD_CH2                       10
@@ -124,7 +133,7 @@ static const struct resource mlxplat_lpc_resources[] = {
 };
 
 /* Platform default channels */
-static const int mlxplat_default_channels[][8] = {
+static const int mlxplat_default_channels[][MLXPLAT_CPLD_GRP_CHNL_NUM] = {
        {
                MLXPLAT_CPLD_CH1, MLXPLAT_CPLD_CH1 + 1, MLXPLAT_CPLD_CH1 + 2,
                MLXPLAT_CPLD_CH1 + 3, MLXPLAT_CPLD_CH1 + 4, MLXPLAT_CPLD_CH1 +
@@ -694,6 +703,8 @@ static int __init mlxplat_dmi_default_matched(const struct dmi_system_id *dmi)
                                ARRAY_SIZE(mlxplat_default_channels[i]);
        }
        mlxplat_hotplug = &mlxplat_mlxcpld_default_data;
+       mlxplat_hotplug->deferred_nr =
+               mlxplat_default_channels[i - 1][MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
        return 1;
 };
@@ -708,6 +719,8 @@ static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi)
                                ARRAY_SIZE(mlxplat_msn21xx_channels);
        }
        mlxplat_hotplug = &mlxplat_mlxcpld_msn21xx_data;
+       mlxplat_hotplug->deferred_nr =
+               mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
        return 1;
 };
@@ -722,6 +735,8 @@ static int __init mlxplat_dmi_msn274x_matched(const struct dmi_system_id *dmi)
                                ARRAY_SIZE(mlxplat_msn21xx_channels);
        }
        mlxplat_hotplug = &mlxplat_mlxcpld_msn274x_data;
+       mlxplat_hotplug->deferred_nr =
+               mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
        return 1;
 };
@@ -736,6 +751,8 @@ static int __init mlxplat_dmi_msn201x_matched(const struct dmi_system_id *dmi)
                                ARRAY_SIZE(mlxplat_msn21xx_channels);
        }
        mlxplat_hotplug = &mlxplat_mlxcpld_msn201x_data;
+       mlxplat_hotplug->deferred_nr =
+               mlxplat_default_channels[i - 1][MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
        return 1;
 };
@@ -750,6 +767,8 @@ static int __init mlxplat_dmi_qmb7xx_matched(const struct dmi_system_id *dmi)
                                ARRAY_SIZE(mlxplat_msn21xx_channels);
        }
        mlxplat_hotplug = &mlxplat_mlxcpld_default_ng_data;
+       mlxplat_hotplug->deferred_nr =
+               mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
        return 1;
 };
@@ -830,10 +849,48 @@ static const struct dmi_system_id mlxplat_dmi_table[] __initconst = {
 
 MODULE_DEVICE_TABLE(dmi, mlxplat_dmi_table);
 
+static int mlxplat_mlxcpld_verify_bus_topology(int *nr)
+{
+       struct i2c_adapter *search_adap;
+       int shift, i;
+
+       /* Scan adapters from expected id to verify it is free. */
+       *nr = MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR;
+       for (i = MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR; i <
+            MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM; i++) {
+               search_adap = i2c_get_adapter(i);
+               if (search_adap) {
+                       i2c_put_adapter(search_adap);
+                       continue;
+               }
+
+               /* Return if expected parent adapter is free. */
+               if (i == MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR)
+                       return 0;
+               break;
+       }
+
+       /* Return with error if free id for adapter is not found. */
+       if (i == MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM)
+               return -ENODEV;
+
+       /* Shift adapter ids, since expected parent adapter is not free. */
+       *nr = i;
+       for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+               shift = *nr - mlxplat_mux_data[i].parent;
+               mlxplat_mux_data[i].parent = *nr;
+               mlxplat_mux_data[i].base_nr += shift;
+               if (shift > 0)
+                       mlxplat_hotplug->shift_nr = shift;
+       }
+
+       return 0;
+}
+
 static int __init mlxplat_init(void)
 {
        struct mlxplat_priv *priv;
-       int i, err;
+       int i, nr, err;
 
        if (!dmi_check_system(mlxplat_dmi_table))
                return -ENODEV;
@@ -853,7 +910,12 @@ static int __init mlxplat_init(void)
        }
        platform_set_drvdata(mlxplat_dev, priv);
 
-       priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", -1,
+       err = mlxplat_mlxcpld_verify_bus_topology(&nr);
+       if (nr < 0)
+               goto fail_alloc;
+
+       nr = (nr == MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM) ? -1 : nr;
+       priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", nr,
                                                         NULL, 0);
        if (IS_ERR(priv->pdev_i2c)) {
                err = PTR_ERR(priv->pdev_i2c);
index 3a624090191dd1ed0314b44b9a8c949aec8f70ac..452aacabaa8efc71115bd4c107e485f490fe138d 100644 (file)
@@ -446,6 +446,23 @@ static const struct dmi_system_id silead_ts_dmi_table[] = {
                        DMI_MATCH(DMI_BOARD_NAME, "X3 Plus"),
                },
        },
+       {
+               /* I.T.Works TW701 */
+               .driver_data = (void *)&surftab_wintron70_st70416_6_data,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "i71c"),
+                       DMI_MATCH(DMI_BIOS_VERSION, "itWORKS.G.WI71C.JGBMRB"),
+               },
+       },
+       {
+               /* Yours Y8W81, same case and touchscreen as Chuwi Vi8 */
+               .driver_data = (void *)&chuwi_vi8_data,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "YOURS"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Y8W81"),
+               },
+       },
        { },
 };
 
index 1c57ee2b6d190501dd3dffe47e82fd1cf628fc4c..da1ca4856ea192afefa641c9511c45dc7d34aa01 100644 (file)
@@ -8703,16 +8703,24 @@ static const struct attribute_group fan_attr_group = {
          .ec = TPID(__id1, __id2),             \
          .quirks = __quirks }
 
+#define TPACPI_FAN_QB(__id1, __id2, __quirks)  \
+       { .vendor = PCI_VENDOR_ID_LENOVO,       \
+         .bios = TPID(__id1, __id2),           \
+         .ec = TPACPI_MATCH_ANY,               \
+         .quirks = __quirks }
+
 static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
        TPACPI_FAN_QI('1', 'Y', TPACPI_FAN_Q1),
        TPACPI_FAN_QI('7', '8', TPACPI_FAN_Q1),
        TPACPI_FAN_QI('7', '6', TPACPI_FAN_Q1),
        TPACPI_FAN_QI('7', '0', TPACPI_FAN_Q1),
        TPACPI_FAN_QL('7', 'M', TPACPI_FAN_2FAN),
+       TPACPI_FAN_QB('N', '1', TPACPI_FAN_2FAN),
 };
 
 #undef TPACPI_FAN_QL
 #undef TPACPI_FAN_QI
+#undef TPACPI_FAN_QB
 
 static int __init fan_init(struct ibm_init_struct *iibm)
 {
index 1032c00b907b99d2ae0f70a6f0ebab76698a98d1..f7761d98c0fd00aeffb19a9192bd8e1a2e45a941 100644 (file)
@@ -1,14 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * ACPI driver for Topstar notebooks (hotkeys support only)
+ * Topstar Laptop ACPI Extras driver
  *
  * Copyright (c) 2009 Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+ * Copyright (c) 2018 Guillaume Douézan-Grard
  *
  * Implementation inspired by existing x86 platform drivers, in special
- * asus/eepc/fujitsu-laptop, thanks to their authors
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * asus/eepc/fujitsu-laptop, thanks to their authors.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/input.h>
 #include <linux/input/sparse-keymap.h>
+#include <linux/leds.h>
+#include <linux/platform_device.h>
 
-#define ACPI_TOPSTAR_CLASS "topstar"
+#define TOPSTAR_LAPTOP_CLASS "topstar"
 
-struct topstar_hkey {
-       struct input_dev *inputdev;
+struct topstar_laptop {
+       struct acpi_device *device;
+       struct platform_device *platform;
+       struct input_dev *input;
+       struct led_classdev led;
 };
 
+/*
+ * LED
+ */
+
+static enum led_brightness topstar_led_get(struct led_classdev *led)
+{
+       return led->brightness;
+}
+
+static int topstar_led_set(struct led_classdev *led,
+               enum led_brightness state)
+{
+       struct topstar_laptop *topstar = container_of(led,
+                       struct topstar_laptop, led);
+
+       struct acpi_object_list params;
+       union acpi_object in_obj;
+       unsigned long long int ret;
+       acpi_status status;
+
+       params.count = 1;
+       params.pointer = &in_obj;
+       in_obj.type = ACPI_TYPE_INTEGER;
+       in_obj.integer.value = 0x83;
+
+       /*
+        * Topstar ACPI returns 0x30001 when the LED is ON and 0x30000 when it
+        * is OFF.
+        */
+       status = acpi_evaluate_integer(topstar->device->handle,
+                       "GETX", &params, &ret);
+       if (ACPI_FAILURE(status))
+               return -1;
+
+       /*
+        * FNCX(0x83) toggles the LED (more precisely, it is supposed to
+        * act as an hardware switch and disconnect the WLAN adapter but
+        * it seems to be faulty on some models like the Topstar U931
+        * Notebook).
+        */
+       if ((ret == 0x30001 && state == LED_OFF)
+                       || (ret == 0x30000 && state != LED_OFF)) {
+               status = acpi_execute_simple_method(topstar->device->handle,
+                               "FNCX", 0x83);
+               if (ACPI_FAILURE(status))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int topstar_led_init(struct topstar_laptop *topstar)
+{
+       topstar->led = (struct led_classdev) {
+               .default_trigger = "rfkill0",
+               .brightness_get = topstar_led_get,
+               .brightness_set_blocking = topstar_led_set,
+               .name = TOPSTAR_LAPTOP_CLASS "::wlan",
+       };
+
+       return led_classdev_register(&topstar->platform->dev, &topstar->led);
+}
+
+static void topstar_led_exit(struct topstar_laptop *topstar)
+{
+       led_classdev_unregister(&topstar->led);
+}
+
+/*
+ * Input
+ */
+
 static const struct key_entry topstar_keymap[] = {
        { KE_KEY, 0x80, { KEY_BRIGHTNESSUP } },
        { KE_KEY, 0x81, { KEY_BRIGHTNESSDOWN } },
@@ -57,107 +133,217 @@ static const struct key_entry topstar_keymap[] = {
        { KE_END, 0 }
 };
 
-static void acpi_topstar_notify(struct acpi_device *device, u32 event)
+static void topstar_input_notify(struct topstar_laptop *topstar, int event)
 {
-       static bool dup_evnt[2];
-       bool *dup;
-       struct topstar_hkey *hkey = acpi_driver_data(device);
-
-       /* 0x83 and 0x84 key events comes duplicated... */
-       if (event == 0x83 || event == 0x84) {
-               dup = &dup_evnt[event - 0x83];
-               if (*dup) {
-                       *dup = false;
-                       return;
-               }
-               *dup = true;
-       }
-
-       if (!sparse_keymap_report_event(hkey->inputdev, event, 1, true))
+       if (!sparse_keymap_report_event(topstar->input, event, 1, true))
                pr_info("unknown event = 0x%02x\n", event);
 }
 
-static int acpi_topstar_fncx_switch(struct acpi_device *device, bool state)
-{
-       acpi_status status;
-
-       status = acpi_execute_simple_method(device->handle, "FNCX",
-                                               state ? 0x86 : 0x87);
-       if (ACPI_FAILURE(status)) {
-               pr_err("Unable to switch FNCX notifications\n");
-               return -ENODEV;
-       }
-
-       return 0;
-}
-
-static int acpi_topstar_init_hkey(struct topstar_hkey *hkey)
+static int topstar_input_init(struct topstar_laptop *topstar)
 {
        struct input_dev *input;
-       int error;
+       int err;
 
        input = input_allocate_device();
        if (!input)
                return -ENOMEM;
 
        input->name = "Topstar Laptop extra buttons";
-       input->phys = "topstar/input0";
+       input->phys = TOPSTAR_LAPTOP_CLASS "/input0";
        input->id.bustype = BUS_HOST;
+       input->dev.parent = &topstar->platform->dev;
 
-       error = sparse_keymap_setup(input, topstar_keymap, NULL);
-       if (error) {
+       err = sparse_keymap_setup(input, topstar_keymap, NULL);
+       if (err) {
                pr_err("Unable to setup input device keymap\n");
                goto err_free_dev;
        }
 
-       error = input_register_device(input);
-       if (error) {
+       err = input_register_device(input);
+       if (err) {
                pr_err("Unable to register input device\n");
                goto err_free_dev;
        }
 
-       hkey->inputdev = input;
+       topstar->input = input;
        return 0;
 
- err_free_dev:
+err_free_dev:
        input_free_device(input);
-       return error;
+       return err;
 }
 
-static int acpi_topstar_add(struct acpi_device *device)
+static void topstar_input_exit(struct topstar_laptop *topstar)
 {
-       struct topstar_hkey *tps_hkey;
+       input_unregister_device(topstar->input);
+}
 
-       tps_hkey = kzalloc(sizeof(struct topstar_hkey), GFP_KERNEL);
-       if (!tps_hkey)
+/*
+ * Platform
+ */
+
+static struct platform_driver topstar_platform_driver = {
+       .driver = {
+               .name = TOPSTAR_LAPTOP_CLASS,
+       },
+};
+
+static int topstar_platform_init(struct topstar_laptop *topstar)
+{
+       int err;
+
+       topstar->platform = platform_device_alloc(TOPSTAR_LAPTOP_CLASS, -1);
+       if (!topstar->platform)
                return -ENOMEM;
 
-       strcpy(acpi_device_name(device), "Topstar TPSACPI");
-       strcpy(acpi_device_class(device), ACPI_TOPSTAR_CLASS);
+       platform_set_drvdata(topstar->platform, topstar);
+
+       err = platform_device_add(topstar->platform);
+       if (err)
+               goto err_device_put;
+
+       return 0;
 
-       if (acpi_topstar_fncx_switch(device, true))
-               goto add_err;
+err_device_put:
+       platform_device_put(topstar->platform);
+       return err;
+}
+
+static void topstar_platform_exit(struct topstar_laptop *topstar)
+{
+       platform_device_unregister(topstar->platform);
+}
+
+/*
+ * ACPI
+ */
+
+static int topstar_acpi_fncx_switch(struct acpi_device *device, bool state)
+{
+       acpi_status status;
+       u64 arg = state ? 0x86 : 0x87;
 
-       if (acpi_topstar_init_hkey(tps_hkey))
-               goto add_err;
+       status = acpi_execute_simple_method(device->handle, "FNCX", arg);
+       if (ACPI_FAILURE(status)) {
+               pr_err("Unable to switch FNCX notifications\n");
+               return -ENODEV;
+       }
 
-       device->driver_data = tps_hkey;
        return 0;
+}
 
-add_err:
-       kfree(tps_hkey);
-       return -ENODEV;
+static void topstar_acpi_notify(struct acpi_device *device, u32 event)
+{
+       struct topstar_laptop *topstar = acpi_driver_data(device);
+       static bool dup_evnt[2];
+       bool *dup;
+
+       /* 0x83 and 0x84 key events comes duplicated... */
+       if (event == 0x83 || event == 0x84) {
+               dup = &dup_evnt[event - 0x83];
+               if (*dup) {
+                       *dup = false;
+                       return;
+               }
+               *dup = true;
+       }
+
+       topstar_input_notify(topstar, event);
 }
 
-static int acpi_topstar_remove(struct acpi_device *device)
+static int topstar_acpi_init(struct topstar_laptop *topstar)
 {
-       struct topstar_hkey *tps_hkey = acpi_driver_data(device);
+       return topstar_acpi_fncx_switch(topstar->device, true);
+}
 
-       acpi_topstar_fncx_switch(device, false);
+static void topstar_acpi_exit(struct topstar_laptop *topstar)
+{
+       topstar_acpi_fncx_switch(topstar->device, false);
+}
 
-       input_unregister_device(tps_hkey->inputdev);
-       kfree(tps_hkey);
+/*
+ * Enable software-based WLAN LED control on systems with defective
+ * hardware switch.
+ */
+static bool led_workaround;
 
+static int dmi_led_workaround(const struct dmi_system_id *id)
+{
+       led_workaround = true;
+       return 0;
+}
+
+static const struct dmi_system_id topstar_dmi_ids[] = {
+       {
+               .callback = dmi_led_workaround,
+               .ident = "Topstar U931/RVP7",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "U931"),
+                       DMI_MATCH(DMI_BOARD_VERSION, "RVP7"),
+               },
+       },
+       {}
+};
+
+static int topstar_acpi_add(struct acpi_device *device)
+{
+       struct topstar_laptop *topstar;
+       int err;
+
+       dmi_check_system(topstar_dmi_ids);
+
+       topstar = kzalloc(sizeof(struct topstar_laptop), GFP_KERNEL);
+       if (!topstar)
+               return -ENOMEM;
+
+       strcpy(acpi_device_name(device), "Topstar TPSACPI");
+       strcpy(acpi_device_class(device), TOPSTAR_LAPTOP_CLASS);
+       device->driver_data = topstar;
+       topstar->device = device;
+
+       err = topstar_acpi_init(topstar);
+       if (err)
+               goto err_free;
+
+       err = topstar_platform_init(topstar);
+       if (err)
+               goto err_acpi_exit;
+
+       err = topstar_input_init(topstar);
+       if (err)
+               goto err_platform_exit;
+
+       if (led_workaround) {
+               err = topstar_led_init(topstar);
+               if (err)
+                       goto err_input_exit;
+       }
+
+       return 0;
+
+err_input_exit:
+       topstar_input_exit(topstar);
+err_platform_exit:
+       topstar_platform_exit(topstar);
+err_acpi_exit:
+       topstar_acpi_exit(topstar);
+err_free:
+       kfree(topstar);
+       return err;
+}
+
+static int topstar_acpi_remove(struct acpi_device *device)
+{
+       struct topstar_laptop *topstar = acpi_driver_data(device);
+
+       if (led_workaround)
+               topstar_led_exit(topstar);
+
+       topstar_input_exit(topstar);
+       topstar_platform_exit(topstar);
+       topstar_acpi_exit(topstar);
+
+       kfree(topstar);
        return 0;
 }
 
@@ -168,18 +354,47 @@ static const struct acpi_device_id topstar_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, topstar_device_ids);
 
-static struct acpi_driver acpi_topstar_driver = {
+static struct acpi_driver topstar_acpi_driver = {
        .name = "Topstar laptop ACPI driver",
-       .class = ACPI_TOPSTAR_CLASS,
+       .class = TOPSTAR_LAPTOP_CLASS,
        .ids = topstar_device_ids,
        .ops = {
-               .add = acpi_topstar_add,
-               .remove = acpi_topstar_remove,
-               .notify = acpi_topstar_notify,
+               .add = topstar_acpi_add,
+               .remove = topstar_acpi_remove,
+               .notify = topstar_acpi_notify,
        },
 };
-module_acpi_driver(acpi_topstar_driver);
+
+static int __init topstar_laptop_init(void)
+{
+       int ret;
+
+       ret = platform_driver_register(&topstar_platform_driver);
+       if (ret < 0)
+               return ret;
+
+       ret = acpi_bus_register_driver(&topstar_acpi_driver);
+       if (ret < 0)
+               goto err_driver_unreg;
+
+       pr_info("ACPI extras driver loaded\n");
+       return 0;
+
+err_driver_unreg:
+       platform_driver_unregister(&topstar_platform_driver);
+       return ret;
+}
+
+static void __exit topstar_laptop_exit(void)
+{
+       acpi_bus_unregister_driver(&topstar_acpi_driver);
+       platform_driver_unregister(&topstar_platform_driver);
+}
+
+module_init(topstar_laptop_init);
+module_exit(topstar_laptop_exit);
 
 MODULE_AUTHOR("Herton Ronaldo Krzesinski");
+MODULE_AUTHOR("Guillaume Douézan-Grard");
 MODULE_DESCRIPTION("Topstar Laptop ACPI Extras driver");
 MODULE_LICENSE("GPL");
index 8796211ef24acdab6e20f976716c18577d5a8fed..8e3d0146ff8c30332184d7aa35f4cae748a0b4bf 100644 (file)
@@ -130,13 +130,11 @@ static bool find_guid(const char *guid_string, struct wmi_block **out)
        uuid_le guid_input;
        struct wmi_block *wblock;
        struct guid_block *block;
-       struct list_head *p;
 
        if (uuid_le_to_bin(guid_string, &guid_input))
                return false;
 
-       list_for_each(p, &wmi_block_list) {
-               wblock = list_entry(p, struct wmi_block, list);
+       list_for_each_entry(wblock, &wmi_block_list, list) {
                block = &wblock->gblock;
 
                if (memcmp(block->guid, &guid_input, 16) == 0) {
@@ -519,7 +517,6 @@ wmi_notify_handler handler, void *data)
        struct wmi_block *block;
        acpi_status status = AE_NOT_EXIST;
        uuid_le guid_input;
-       struct list_head *p;
 
        if (!guid || !handler)
                return AE_BAD_PARAMETER;
@@ -527,9 +524,8 @@ wmi_notify_handler handler, void *data)
        if (uuid_le_to_bin(guid, &guid_input))
                return AE_BAD_PARAMETER;
 
-       list_for_each(p, &wmi_block_list) {
+       list_for_each_entry(block, &wmi_block_list, list) {
                acpi_status wmi_status;
-               block = list_entry(p, struct wmi_block, list);
 
                if (memcmp(block->gblock.guid, &guid_input, 16) == 0) {
                        if (block->handler &&
@@ -560,7 +556,6 @@ acpi_status wmi_remove_notify_handler(const char *guid)
        struct wmi_block *block;
        acpi_status status = AE_NOT_EXIST;
        uuid_le guid_input;
-       struct list_head *p;
 
        if (!guid)
                return AE_BAD_PARAMETER;
@@ -568,9 +563,8 @@ acpi_status wmi_remove_notify_handler(const char *guid)
        if (uuid_le_to_bin(guid, &guid_input))
                return AE_BAD_PARAMETER;
 
-       list_for_each(p, &wmi_block_list) {
+       list_for_each_entry(block, &wmi_block_list, list) {
                acpi_status wmi_status;
-               block = list_entry(p, struct wmi_block, list);
 
                if (memcmp(block->gblock.guid, &guid_input, 16) == 0) {
                        if (!block->handler ||
@@ -610,15 +604,13 @@ acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out)
        union acpi_object params[1];
        struct guid_block *gblock;
        struct wmi_block *wblock;
-       struct list_head *p;
 
        input.count = 1;
        input.pointer = params;
        params[0].type = ACPI_TYPE_INTEGER;
        params[0].integer.value = event;
 
-       list_for_each(p, &wmi_block_list) {
-               wblock = list_entry(p, struct wmi_block, list);
+       list_for_each_entry(wblock, &wmi_block_list, list) {
                gblock = &wblock->gblock;
 
                if ((gblock->flags & ACPI_WMI_EVENT) &&
@@ -933,12 +925,11 @@ static int wmi_dev_probe(struct device *dev)
                        goto probe_failure;
                }
 
-               buf = kmalloc(strlen(wdriver->driver.name) + 5, GFP_KERNEL);
+               buf = kasprintf(GFP_KERNEL, "wmi/%s", wdriver->driver.name);
                if (!buf) {
                        ret = -ENOMEM;
                        goto probe_string_failure;
                }
-               sprintf(buf, "wmi/%s", wdriver->driver.name);
                wblock->char_dev.minor = MISC_DYNAMIC_MINOR;
                wblock->char_dev.name = buf;
                wblock->char_dev.fops = &wmi_fops;
@@ -1261,11 +1252,9 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event,
 {
        struct guid_block *block;
        struct wmi_block *wblock;
-       struct list_head *p;
        bool found_it = false;
 
-       list_for_each(p, &wmi_block_list) {
-               wblock = list_entry(p, struct wmi_block, list);
+       list_for_each_entry(wblock, &wmi_block_list, list) {
                block = &wblock->gblock;
 
                if (wblock->acpi_device->handle == handle &&
index cfb54e01d758fb240f1cb4f030873c7eba071e8a..9d27016c899ed29e49bb6395bc8686323ea2cd90 100644 (file)
@@ -212,7 +212,6 @@ struct mport_cdev_priv {
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
        struct dma_chan         *dmach;
        struct list_head        async_list;
-       struct list_head        pend_list;
        spinlock_t              req_lock;
        struct mutex            dma_lock;
        struct kref             dma_ref;
@@ -258,8 +257,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mport_cdev_wait);
 static struct class *dev_class;
 static dev_t dev_number;
 
-static struct workqueue_struct *dma_wq;
-
 static void mport_release_mapping(struct kref *ref);
 
 static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg,
@@ -539,6 +536,7 @@ static int maint_comptag_set(struct mport_cdev_priv *priv, void __user *arg)
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 
 struct mport_dma_req {
+       struct kref refcount;
        struct list_head node;
        struct file *filp;
        struct mport_cdev_priv *priv;
@@ -554,11 +552,6 @@ struct mport_dma_req {
        struct completion req_comp;
 };
 
-struct mport_faf_work {
-       struct work_struct work;
-       struct mport_dma_req *req;
-};
-
 static void mport_release_def_dma(struct kref *dma_ref)
 {
        struct mport_dev *md =
@@ -578,8 +571,10 @@ static void mport_release_dma(struct kref *dma_ref)
        complete(&priv->comp);
 }
 
-static void dma_req_free(struct mport_dma_req *req)
+static void dma_req_free(struct kref *ref)
 {
+       struct mport_dma_req *req = container_of(ref, struct mport_dma_req,
+                       refcount);
        struct mport_cdev_priv *priv = req->priv;
        unsigned int i;
 
@@ -611,30 +606,7 @@ static void dma_xfer_callback(void *param)
        req->status = dma_async_is_tx_complete(priv->dmach, req->cookie,
                                               NULL, NULL);
        complete(&req->req_comp);
-}
-
-static void dma_faf_cleanup(struct work_struct *_work)
-{
-       struct mport_faf_work *work = container_of(_work,
-                                               struct mport_faf_work, work);
-       struct mport_dma_req *req = work->req;
-
-       dma_req_free(req);
-       kfree(work);
-}
-
-static void dma_faf_callback(void *param)
-{
-       struct mport_dma_req *req = (struct mport_dma_req *)param;
-       struct mport_faf_work *work;
-
-       work = kmalloc(sizeof(*work), GFP_ATOMIC);
-       if (!work)
-               return;
-
-       INIT_WORK(&work->work, dma_faf_cleanup);
-       work->req = req;
-       queue_work(dma_wq, &work->work);
+       kref_put(&req->refcount, dma_req_free);
 }
 
 /*
@@ -765,16 +737,14 @@ static int do_dma_request(struct mport_dma_req *req,
                goto err_out;
        }
 
-       if (sync == RIO_TRANSFER_FAF)
-               tx->callback = dma_faf_callback;
-       else
-               tx->callback = dma_xfer_callback;
+       tx->callback = dma_xfer_callback;
        tx->callback_param = req;
 
        req->dmach = chan;
        req->sync = sync;
        req->status = DMA_IN_PROGRESS;
        init_completion(&req->req_comp);
+       kref_get(&req->refcount);
 
        cookie = dmaengine_submit(tx);
        req->cookie = cookie;
@@ -785,6 +755,7 @@ static int do_dma_request(struct mport_dma_req *req,
        if (dma_submit_error(cookie)) {
                rmcd_error("submit err=%d (addr:0x%llx len:0x%llx)",
                           cookie, xfer->rio_addr, xfer->length);
+               kref_put(&req->refcount, dma_req_free);
                ret = -EIO;
                goto err_out;
        }
@@ -860,6 +831,8 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
        if (!req)
                return -ENOMEM;
 
+       kref_init(&req->refcount);
+
        ret = get_dma_channel(priv);
        if (ret) {
                kfree(req);
@@ -968,42 +941,20 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
        ret = do_dma_request(req, xfer, sync, nents);
 
        if (ret >= 0) {
-               if (sync == RIO_TRANSFER_SYNC)
-                       goto sync_out;
-               return ret; /* return ASYNC cookie */
-       }
-
-       if (ret == -ETIMEDOUT || ret == -EINTR) {
-               /*
-                * This can happen only in case of SYNC transfer.
-                * Do not free unfinished request structure immediately.
-                * Place it into pending list and deal with it later
-                */
-               spin_lock(&priv->req_lock);
-               list_add_tail(&req->node, &priv->pend_list);
-               spin_unlock(&priv->req_lock);
-               return ret;
+               if (sync == RIO_TRANSFER_ASYNC)
+                       return ret; /* return ASYNC cookie */
+       } else {
+               rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
        }
 
-
-       rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
-sync_out:
-       dma_unmap_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir);
-       sg_free_table(&req->sgt);
 err_pg:
-       if (page_list) {
+       if (!req->page_list) {
                for (i = 0; i < nr_pages; i++)
                        put_page(page_list[i]);
                kfree(page_list);
        }
 err_req:
-       if (req->map) {
-               mutex_lock(&md->buf_mutex);
-               kref_put(&req->map->ref, mport_release_mapping);
-               mutex_unlock(&md->buf_mutex);
-       }
-       put_dma_channel(priv);
-       kfree(req);
+       kref_put(&req->refcount, dma_req_free);
        return ret;
 }
 
@@ -1121,7 +1072,7 @@ static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
                ret = 0;
 
        if (req->status != DMA_IN_PROGRESS && req->status != DMA_PAUSED)
-               dma_req_free(req);
+               kref_put(&req->refcount, dma_req_free);
 
        return ret;
 
@@ -1966,7 +1917,6 @@ static int mport_cdev_open(struct inode *inode, struct file *filp)
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
        INIT_LIST_HEAD(&priv->async_list);
-       INIT_LIST_HEAD(&priv->pend_list);
        spin_lock_init(&priv->req_lock);
        mutex_init(&priv->dma_lock);
 #endif
@@ -2006,8 +1956,6 @@ static void mport_cdev_release_dma(struct file *filp)
 
        md = priv->md;
 
-       flush_workqueue(dma_wq);
-
        spin_lock(&priv->req_lock);
        if (!list_empty(&priv->async_list)) {
                rmcd_debug(EXIT, "async list not empty filp=%p %s(%d)",
@@ -2023,20 +1971,7 @@ static void mport_cdev_release_dma(struct file *filp)
                                   req->filp, req->cookie,
                                   completion_done(&req->req_comp)?"yes":"no");
                        list_del(&req->node);
-                       dma_req_free(req);
-               }
-       }
-
-       if (!list_empty(&priv->pend_list)) {
-               rmcd_debug(EXIT, "Free pending DMA requests for filp=%p %s(%d)",
-                          filp, current->comm, task_pid_nr(current));
-               list_for_each_entry_safe(req,
-                                        req_next, &priv->pend_list, node) {
-                       rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
-                                  req->filp, req->cookie,
-                                  completion_done(&req->req_comp)?"yes":"no");
-                       list_del(&req->node);
-                       dma_req_free(req);
+                       kref_put(&req->refcount, dma_req_free);
                }
        }
 
@@ -2048,15 +1983,6 @@ static void mport_cdev_release_dma(struct file *filp)
                        current->comm, task_pid_nr(current), wret);
        }
 
-       spin_lock(&priv->req_lock);
-
-       if (!list_empty(&priv->pend_list)) {
-               rmcd_debug(EXIT, "ATTN: pending DMA requests, filp=%p %s(%d)",
-                          filp, current->comm, task_pid_nr(current));
-       }
-
-       spin_unlock(&priv->req_lock);
-
        if (priv->dmach != priv->md->dma_chan) {
                rmcd_debug(EXIT, "Release DMA channel for filp=%p %s(%d)",
                           filp, current->comm, task_pid_nr(current));
@@ -2573,8 +2499,6 @@ static void mport_cdev_remove(struct mport_dev *md)
        cdev_device_del(&md->cdev, &md->dev);
        mport_cdev_kill_fasync(md);
 
-       flush_workqueue(dma_wq);
-
        /* TODO: do we need to give clients some time to close file
         * descriptors? Simple wait for XX, or kref?
         */
@@ -2691,17 +2615,8 @@ static int __init mport_init(void)
                goto err_cli;
        }
 
-       dma_wq = create_singlethread_workqueue("dma_wq");
-       if (!dma_wq) {
-               rmcd_error("failed to create DMA work queue");
-               ret = -ENOMEM;
-               goto err_wq;
-       }
-
        return 0;
 
-err_wq:
-       class_interface_unregister(&rio_mport_interface);
 err_cli:
        unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
 err_chr:
@@ -2717,7 +2632,6 @@ static void __exit mport_exit(void)
        class_interface_unregister(&rio_mport_interface);
        class_destroy(dev_class);
        unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
-       destroy_workqueue(dma_wq);
 }
 
 module_init(mport_init);
index 23429bdaca8486a48db86e2e2f28b269bacc9328..161b927d9de1e24b3a034ac3926741312c655456 100644 (file)
@@ -76,7 +76,7 @@ static u16 rio_destid_alloc(struct rio_net *net)
 }
 
 /**
- * rio_destid_reserve - Reserve the specivied destID
+ * rio_destid_reserve - Reserve the specified destID
  * @net: RIO network
  * @destid: destID to reserve
  *
@@ -885,7 +885,7 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport,
  *
  * For each enumerated device, ensure that each switch in a system
  * has correct routing entries. Add routes for devices that where
- * unknown dirung the first enumeration pass through the switch.
+ * unknown during the first enumeration pass through the switch.
  */
 static void rio_update_route_tables(struct rio_net *net)
 {
@@ -983,7 +983,7 @@ static int rio_enum_mport(struct rio_mport *mport, u32 flags)
                /* reserve mport destID in new net */
                rio_destid_reserve(net, mport->host_deviceid);
 
-               /* Enable Input Output Port (transmitter reviever) */
+               /* Enable Input Output Port (transmitter receiver) */
                rio_enable_rx_tx_port(mport, 1, 0, 0, 0);
 
                /* Set component tag for host */
index b609e1d3654ba65f13d480ce71834fb5b507773b..027274008b086d6f421a3aa0c0ecf5cf2804cb76 100644 (file)
@@ -6,6 +6,7 @@ config REMOTEPROC
        select CRC32
        select FW_LOADER
        select VIRTIO
+       select WANT_DEV_COREDUMP
        help
          Support for remote processors (such as DSP coprocessors). These
          are mainly used on embedded systems.
@@ -90,6 +91,7 @@ config QCOM_ADSP_PIL
        depends on QCOM_SMEM
        depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
+       depends on QCOM_SYSMON || QCOM_SYSMON=n
        select MFD_SYSCON
        select QCOM_MDT_LOADER
        select QCOM_RPROC_COMMON
@@ -107,6 +109,7 @@ config QCOM_Q6V5_PIL
        depends on QCOM_SMEM
        depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
+       depends on QCOM_SYSMON || QCOM_SYSMON=n
        select MFD_SYSCON
        select QCOM_RPROC_COMMON
        select QCOM_SCM
@@ -114,12 +117,28 @@ config QCOM_Q6V5_PIL
          Say y here to support the Qualcomm Peripherial Image Loader for the
          Hexagon V5 based remote processors.
 
+config QCOM_SYSMON
+       tristate "Qualcomm sysmon driver"
+       depends on RPMSG
+       depends on ARCH_QCOM
+       depends on NET
+       select QCOM_QMI_HELPERS
+       help
+         The sysmon driver implements a sysmon QMI client and a handler for
+         the sys_mon SMD and GLINK channel, which are used for graceful
+         shutdown, retrieving failure information and propagating information
+         about other subsystems being shut down.
+
+         Say y here if your system runs firmware on any other subsystems, e.g.
+         modem or DSP.
+
 config QCOM_WCNSS_PIL
        tristate "Qualcomm WCNSS Peripheral Image Loader"
        depends on OF && ARCH_QCOM
        depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
        depends on QCOM_SMEM
+       depends on QCOM_SYSMON || QCOM_SYSMON=n
        select QCOM_MDT_LOADER
        select QCOM_RPROC_COMMON
        select QCOM_SCM
index 6e16450ce11f484d53f21e40fae54a0889d113d3..02627ede8d4a4e8a77fb0260d81ba5454faee9a5 100644 (file)
@@ -17,6 +17,7 @@ obj-$(CONFIG_KEYSTONE_REMOTEPROC)     += keystone_remoteproc.o
 obj-$(CONFIG_QCOM_ADSP_PIL)            += qcom_adsp_pil.o
 obj-$(CONFIG_QCOM_RPROC_COMMON)                += qcom_common.o
 obj-$(CONFIG_QCOM_Q6V5_PIL)            += qcom_q6v5_pil.o
+obj-$(CONFIG_QCOM_SYSMON)              += qcom_sysmon.o
 obj-$(CONFIG_QCOM_WCNSS_PIL)           += qcom_wcnss_pil.o
 qcom_wcnss_pil-y                       += qcom_wcnss.o
 qcom_wcnss_pil-y                       += qcom_wcnss_iris.o
index 633268e9d550de7001999052f2692239b4754f6b..54c07fd3f2042efdbf60d6a1feb6d891af0a76ee 100644 (file)
@@ -333,14 +333,14 @@ static int imx_rproc_probe(struct platform_device *pdev)
        /* set some other name then imx */
        rproc = rproc_alloc(dev, "imx-rproc", &imx_rproc_ops,
                            NULL, sizeof(*priv));
-       if (!rproc) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       if (!rproc)
+               return -ENOMEM;
 
        dcfg = of_device_get_match_data(dev);
-       if (!dcfg)
-               return -EINVAL;
+       if (!dcfg) {
+               ret = -EINVAL;
+               goto err_put_rproc;
+       }
 
        priv = rproc->priv;
        priv->rproc = rproc;
@@ -359,8 +359,8 @@ static int imx_rproc_probe(struct platform_device *pdev)
        priv->clk = devm_clk_get(dev, NULL);
        if (IS_ERR(priv->clk)) {
                dev_err(dev, "Failed to get clock\n");
-               rproc_free(rproc);
-               return PTR_ERR(priv->clk);
+               ret = PTR_ERR(priv->clk);
+               goto err_put_rproc;
        }
 
        /*
@@ -370,8 +370,7 @@ static int imx_rproc_probe(struct platform_device *pdev)
        ret = clk_prepare_enable(priv->clk);
        if (ret) {
                dev_err(&rproc->dev, "Failed to enable clock\n");
-               rproc_free(rproc);
-               return ret;
+               goto err_put_rproc;
        }
 
        ret = rproc_add(rproc);
@@ -380,13 +379,13 @@ static int imx_rproc_probe(struct platform_device *pdev)
                goto err_put_clk;
        }
 
-       return ret;
+       return 0;
 
 err_put_clk:
        clk_disable_unprepare(priv->clk);
 err_put_rproc:
        rproc_free(rproc);
-err:
+
        return ret;
 }
 
index 373c167892d7d4754ccef7161f1f3fea1054afc6..89a86ce07f993585c8170e059832d4c97611d365 100644 (file)
@@ -38,7 +38,10 @@ struct adsp_data {
        const char *firmware_name;
        int pas_id;
        bool has_aggre2_clk;
+
        const char *ssr_name;
+       const char *sysmon_name;
+       int ssctl_id;
 };
 
 struct qcom_adsp {
@@ -75,6 +78,7 @@ struct qcom_adsp {
        struct qcom_rproc_glink glink_subdev;
        struct qcom_rproc_subdev smd_subdev;
        struct qcom_rproc_ssr ssr_subdev;
+       struct qcom_sysmon *sysmon;
 };
 
 static int adsp_load(struct rproc *rproc, const struct firmware *fw)
@@ -82,7 +86,9 @@ static int adsp_load(struct rproc *rproc, const struct firmware *fw)
        struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
 
        return qcom_mdt_load(adsp->dev, fw, rproc->firmware, adsp->pas_id,
-                            adsp->mem_region, adsp->mem_phys, adsp->mem_size);
+                            adsp->mem_region, adsp->mem_phys, adsp->mem_size,
+                            &adsp->mem_reloc);
+
 }
 
 static int adsp_start(struct rproc *rproc)
@@ -177,6 +183,7 @@ static const struct rproc_ops adsp_ops = {
        .start = adsp_start,
        .stop = adsp_stop,
        .da_to_va = adsp_da_to_va,
+       .parse_fw = qcom_register_dump_segments,
        .load = adsp_load,
 };
 
@@ -201,9 +208,6 @@ static irqreturn_t adsp_fatal_interrupt(int irq, void *dev)
 
        rproc_report_crash(adsp->rproc, RPROC_FATAL_ERROR);
 
-       if (!IS_ERR(msg))
-               msg[0] = '\0';
-
        return IRQ_HANDLED;
 }
 
@@ -398,6 +402,9 @@ static int adsp_probe(struct platform_device *pdev)
        qcom_add_glink_subdev(rproc, &adsp->glink_subdev);
        qcom_add_smd_subdev(rproc, &adsp->smd_subdev);
        qcom_add_ssr_subdev(rproc, &adsp->ssr_subdev, desc->ssr_name);
+       adsp->sysmon = qcom_add_sysmon_subdev(rproc,
+                                             desc->sysmon_name,
+                                             desc->ssctl_id);
 
        ret = rproc_add(rproc);
        if (ret)
@@ -419,6 +426,7 @@ static int adsp_remove(struct platform_device *pdev)
        rproc_del(adsp->rproc);
 
        qcom_remove_glink_subdev(adsp->rproc, &adsp->glink_subdev);
+       qcom_remove_sysmon_subdev(adsp->sysmon);
        qcom_remove_smd_subdev(adsp->rproc, &adsp->smd_subdev);
        qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev);
        rproc_free(adsp->rproc);
@@ -432,6 +440,8 @@ static const struct adsp_data adsp_resource_init = {
                .pas_id = 1,
                .has_aggre2_clk = false,
                .ssr_name = "lpass",
+               .sysmon_name = "adsp",
+               .ssctl_id = 0x14,
 };
 
 static const struct adsp_data slpi_resource_init = {
@@ -440,6 +450,8 @@ static const struct adsp_data slpi_resource_init = {
                .pas_id = 12,
                .has_aggre2_clk = true,
                .ssr_name = "dsps",
+               .sysmon_name = "slpi",
+               .ssctl_id = 0x16,
 };
 
 static const struct of_device_id adsp_of_match[] = {
index 00602499713f43e3b82d92852c4225ab748fe0ef..acfc99f82fb808fbc3f83462c781eadb7a65bed0 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/remoteproc.h>
 #include <linux/rpmsg/qcom_glink.h>
 #include <linux/rpmsg/qcom_smd.h>
+#include <linux/soc/qcom/mdt_loader.h>
 
 #include "remoteproc_internal.h"
 #include "qcom_common.h"
@@ -41,7 +42,7 @@ static int glink_subdev_probe(struct rproc_subdev *subdev)
        return PTR_ERR_OR_ZERO(glink->edge);
 }
 
-static void glink_subdev_remove(struct rproc_subdev *subdev)
+static void glink_subdev_remove(struct rproc_subdev *subdev, bool crashed)
 {
        struct qcom_rproc_glink *glink = to_glink_subdev(subdev);
 
@@ -74,11 +75,57 @@ EXPORT_SYMBOL_GPL(qcom_add_glink_subdev);
  */
 void qcom_remove_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink)
 {
+       if (!glink->node)
+               return;
+
        rproc_remove_subdev(rproc, &glink->subdev);
        of_node_put(glink->node);
 }
 EXPORT_SYMBOL_GPL(qcom_remove_glink_subdev);
 
+/**
+ * qcom_register_dump_segments() - register segments for coredump
+ * @rproc:     remoteproc handle
+ * @fw:                firmware header
+ *
+ * Register all segments of the ELF in the remoteproc coredump segment list
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int qcom_register_dump_segments(struct rproc *rproc,
+                               const struct firmware *fw)
+{
+       const struct elf32_phdr *phdrs;
+       const struct elf32_phdr *phdr;
+       const struct elf32_hdr *ehdr;
+       int ret;
+       int i;
+
+       ehdr = (struct elf32_hdr *)fw->data;
+       phdrs = (struct elf32_phdr *)(ehdr + 1);
+
+       for (i = 0; i < ehdr->e_phnum; i++) {
+               phdr = &phdrs[i];
+
+               if (phdr->p_type != PT_LOAD)
+                       continue;
+
+               if ((phdr->p_flags & QCOM_MDT_TYPE_MASK) == QCOM_MDT_TYPE_HASH)
+                       continue;
+
+               if (!phdr->p_memsz)
+                       continue;
+
+               ret = rproc_coredump_add_segment(rproc, phdr->p_paddr,
+                                                phdr->p_memsz);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(qcom_register_dump_segments);
+
 static int smd_subdev_probe(struct rproc_subdev *subdev)
 {
        struct qcom_rproc_subdev *smd = to_smd_subdev(subdev);
@@ -88,7 +135,7 @@ static int smd_subdev_probe(struct rproc_subdev *subdev)
        return PTR_ERR_OR_ZERO(smd->edge);
 }
 
-static void smd_subdev_remove(struct rproc_subdev *subdev)
+static void smd_subdev_remove(struct rproc_subdev *subdev, bool crashed)
 {
        struct qcom_rproc_subdev *smd = to_smd_subdev(subdev);
 
@@ -121,6 +168,9 @@ EXPORT_SYMBOL_GPL(qcom_add_smd_subdev);
  */
 void qcom_remove_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd)
 {
+       if (!smd->node)
+               return;
+
        rproc_remove_subdev(rproc, &smd->subdev);
        of_node_put(smd->node);
 }
@@ -157,7 +207,7 @@ static int ssr_notify_start(struct rproc_subdev *subdev)
        return  0;
 }
 
-static void ssr_notify_stop(struct rproc_subdev *subdev)
+static void ssr_notify_stop(struct rproc_subdev *subdev, bool crashed)
 {
        struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
 
index 728be9834d8b2196eac80865f261acd40ffc1b87..58de71e4781c966b6757ec62e16e182d7cca79ba 100644 (file)
@@ -4,6 +4,9 @@
 
 #include <linux/remoteproc.h>
 #include "remoteproc_internal.h"
+#include <linux/soc/qcom/qmi.h>
+
+struct qcom_sysmon;
 
 struct qcom_rproc_glink {
        struct rproc_subdev subdev;
@@ -30,6 +33,8 @@ struct qcom_rproc_ssr {
 void qcom_add_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink);
 void qcom_remove_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink);
 
+int qcom_register_dump_segments(struct rproc *rproc, const struct firmware *fw);
+
 void qcom_add_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd);
 void qcom_remove_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd);
 
@@ -37,4 +42,22 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
                         const char *ssr_name);
 void qcom_remove_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr);
 
+#if IS_ENABLED(CONFIG_QCOM_SYSMON)
+struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+                                          const char *name,
+                                          int ssctl_instance);
+void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon);
+#else
+static inline struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+                                                        const char *name,
+                                                        int ssctl_instance)
+{
+       return NULL;
+}
+
+static inline void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
+{
+}
+#endif
+
 #endif
index b4e5e725848d263936d93b7bbe1645dd94e37ce1..8e70a627e0bbc70ba1040a51c5cc71a93b0ddaab 100644 (file)
@@ -168,6 +168,7 @@ struct q6v5 {
 
        struct qcom_rproc_subdev smd_subdev;
        struct qcom_rproc_ssr ssr_subdev;
+       struct qcom_sysmon *sysmon;
        bool need_mem_protection;
        int mpss_perm;
        int mba_perm;
@@ -939,9 +940,6 @@ static irqreturn_t q6v5_wdog_interrupt(int irq, void *dev)
 
        rproc_report_crash(qproc->rproc, RPROC_WATCHDOG);
 
-       if (!IS_ERR(msg))
-               msg[0] = '\0';
-
        return IRQ_HANDLED;
 }
 
@@ -959,9 +957,6 @@ static irqreturn_t q6v5_fatal_interrupt(int irq, void *dev)
 
        rproc_report_crash(qproc->rproc, RPROC_FATAL_ERROR);
 
-       if (!IS_ERR(msg))
-               msg[0] = '\0';
-
        return IRQ_HANDLED;
 }
 
@@ -1215,6 +1210,7 @@ static int q6v5_probe(struct platform_device *pdev)
        qproc->mba_perm = BIT(QCOM_SCM_VMID_HLOS);
        qcom_add_smd_subdev(rproc, &qproc->smd_subdev);
        qcom_add_ssr_subdev(rproc, &qproc->ssr_subdev, "mpss");
+       qproc->sysmon = qcom_add_sysmon_subdev(rproc, "modem", 0x12);
 
        ret = rproc_add(rproc);
        if (ret)
@@ -1234,6 +1230,7 @@ static int q6v5_remove(struct platform_device *pdev)
 
        rproc_del(qproc->rproc);
 
+       qcom_remove_sysmon_subdev(qproc->sysmon);
        qcom_remove_smd_subdev(qproc->rproc, &qproc->smd_subdev);
        qcom_remove_ssr_subdev(qproc->rproc, &qproc->ssr_subdev);
        rproc_free(qproc->rproc);
diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
new file mode 100644 (file)
index 0000000..f085545
--- /dev/null
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017, Linaro Ltd.
+ */
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/notifier.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/remoteproc/qcom_rproc.h>
+#include <linux/rpmsg.h>
+
+#include "qcom_common.h"
+
+static BLOCKING_NOTIFIER_HEAD(sysmon_notifiers);
+
+struct qcom_sysmon {
+       struct rproc_subdev subdev;
+       struct rproc *rproc;
+
+       struct list_head node;
+
+       const char *name;
+
+       int ssctl_version;
+       int ssctl_instance;
+
+       struct notifier_block nb;
+
+       struct device *dev;
+
+       struct rpmsg_endpoint *ept;
+       struct completion comp;
+       struct mutex lock;
+
+       bool ssr_ack;
+
+       struct qmi_handle qmi;
+       struct sockaddr_qrtr ssctl;
+};
+
+static DEFINE_MUTEX(sysmon_lock);
+static LIST_HEAD(sysmon_list);
+
+/**
+ * sysmon_send_event() - send notification of other remote's SSR event
+ * @sysmon:    sysmon context
+ * @name:      other remote's name
+ */
+static void sysmon_send_event(struct qcom_sysmon *sysmon, const char *name)
+{
+       char req[50];
+       int len;
+       int ret;
+
+       len = snprintf(req, sizeof(req), "ssr:%s:before_shutdown", name);
+       if (len >= sizeof(req))
+               return;
+
+       mutex_lock(&sysmon->lock);
+       reinit_completion(&sysmon->comp);
+       sysmon->ssr_ack = false;
+
+       ret = rpmsg_send(sysmon->ept, req, len);
+       if (ret < 0) {
+               dev_err(sysmon->dev, "failed to send sysmon event\n");
+               goto out_unlock;
+       }
+
+       ret = wait_for_completion_timeout(&sysmon->comp,
+                                         msecs_to_jiffies(5000));
+       if (!ret) {
+               dev_err(sysmon->dev, "timeout waiting for sysmon ack\n");
+               goto out_unlock;
+       }
+
+       if (!sysmon->ssr_ack)
+               dev_err(sysmon->dev, "unexpected response to sysmon event\n");
+
+out_unlock:
+       mutex_unlock(&sysmon->lock);
+}
+
+/**
+ * sysmon_request_shutdown() - request graceful shutdown of remote
+ * @sysmon:    sysmon context
+ */
+static void sysmon_request_shutdown(struct qcom_sysmon *sysmon)
+{
+       char *req = "ssr:shutdown";
+       int ret;
+
+       mutex_lock(&sysmon->lock);
+       reinit_completion(&sysmon->comp);
+       sysmon->ssr_ack = false;
+
+       ret = rpmsg_send(sysmon->ept, req, strlen(req) + 1);
+       if (ret < 0) {
+               dev_err(sysmon->dev, "send sysmon shutdown request failed\n");
+               goto out_unlock;
+       }
+
+       ret = wait_for_completion_timeout(&sysmon->comp,
+                                         msecs_to_jiffies(5000));
+       if (!ret) {
+               dev_err(sysmon->dev, "timeout waiting for sysmon ack\n");
+               goto out_unlock;
+       }
+
+       if (!sysmon->ssr_ack)
+               dev_err(sysmon->dev,
+                       "unexpected response to sysmon shutdown request\n");
+
+out_unlock:
+       mutex_unlock(&sysmon->lock);
+}
+
+static int sysmon_callback(struct rpmsg_device *rpdev, void *data, int count,
+                          void *priv, u32 addr)
+{
+       struct qcom_sysmon *sysmon = priv;
+       const char *ssr_ack = "ssr:ack";
+       const int ssr_ack_len = strlen(ssr_ack) + 1;
+
+       if (!sysmon)
+               return -EINVAL;
+
+       if (count >= ssr_ack_len && !memcmp(data, ssr_ack, ssr_ack_len))
+               sysmon->ssr_ack = true;
+
+       complete(&sysmon->comp);
+
+       return 0;
+}
+
+#define SSCTL_SHUTDOWN_REQ             0x21
+#define SSCTL_SUBSYS_EVENT_REQ         0x23
+
+#define SSCTL_MAX_MSG_LEN              7
+
+#define SSCTL_SUBSYS_NAME_LENGTH       15
+
+enum {
+       SSCTL_SSR_EVENT_BEFORE_POWERUP,
+       SSCTL_SSR_EVENT_AFTER_POWERUP,
+       SSCTL_SSR_EVENT_BEFORE_SHUTDOWN,
+       SSCTL_SSR_EVENT_AFTER_SHUTDOWN,
+};
+
+enum {
+       SSCTL_SSR_EVENT_FORCED,
+       SSCTL_SSR_EVENT_GRACEFUL,
+};
+
+struct ssctl_shutdown_resp {
+       struct qmi_response_type_v01 resp;
+};
+
+static struct qmi_elem_info ssctl_shutdown_resp_ei[] = {
+       {
+               .data_type      = QMI_STRUCT,
+               .elem_len       = 1,
+               .elem_size      = sizeof(struct qmi_response_type_v01),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = 0x02,
+               .offset         = offsetof(struct ssctl_shutdown_resp, resp),
+               .ei_array       = qmi_response_type_v01_ei,
+       },
+       {}
+};
+
+struct ssctl_subsys_event_req {
+       u8 subsys_name_len;
+       char subsys_name[SSCTL_SUBSYS_NAME_LENGTH];
+       u32 event;
+       u8 evt_driven_valid;
+       u32 evt_driven;
+};
+
+static struct qmi_elem_info ssctl_subsys_event_req_ei[] = {
+       {
+               .data_type      = QMI_DATA_LEN,
+               .elem_len       = 1,
+               .elem_size      = sizeof(uint8_t),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = 0x01,
+               .offset         = offsetof(struct ssctl_subsys_event_req,
+                                          subsys_name_len),
+               .ei_array       = NULL,
+       },
+       {
+               .data_type      = QMI_UNSIGNED_1_BYTE,
+               .elem_len       = SSCTL_SUBSYS_NAME_LENGTH,
+               .elem_size      = sizeof(char),
+               .array_type     = VAR_LEN_ARRAY,
+               .tlv_type       = 0x01,
+               .offset         = offsetof(struct ssctl_subsys_event_req,
+                                          subsys_name),
+               .ei_array       = NULL,
+       },
+       {
+               .data_type      = QMI_SIGNED_4_BYTE_ENUM,
+               .elem_len       = 1,
+               .elem_size      = sizeof(uint32_t),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = 0x02,
+               .offset         = offsetof(struct ssctl_subsys_event_req,
+                                          event),
+               .ei_array       = NULL,
+       },
+       {
+               .data_type      = QMI_OPT_FLAG,
+               .elem_len       = 1,
+               .elem_size      = sizeof(uint8_t),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = 0x10,
+               .offset         = offsetof(struct ssctl_subsys_event_req,
+                                          evt_driven_valid),
+               .ei_array       = NULL,
+       },
+       {
+               .data_type      = QMI_SIGNED_4_BYTE_ENUM,
+               .elem_len       = 1,
+               .elem_size      = sizeof(uint32_t),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = 0x10,
+               .offset         = offsetof(struct ssctl_subsys_event_req,
+                                          evt_driven),
+               .ei_array       = NULL,
+       },
+       {}
+};
+
+struct ssctl_subsys_event_resp {
+       struct qmi_response_type_v01 resp;
+};
+
+static struct qmi_elem_info ssctl_subsys_event_resp_ei[] = {
+       {
+               .data_type      = QMI_STRUCT,
+               .elem_len       = 1,
+               .elem_size      = sizeof(struct qmi_response_type_v01),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = 0x02,
+               .offset         = offsetof(struct ssctl_subsys_event_resp,
+                                          resp),
+               .ei_array       = qmi_response_type_v01_ei,
+       },
+       {}
+};
+
+/**
+ * ssctl_request_shutdown() - request shutdown via SSCTL QMI service
+ * @sysmon:    sysmon context
+ */
+static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
+{
+       struct ssctl_shutdown_resp resp;
+       struct qmi_txn txn;
+       int ret;
+
+       ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_shutdown_resp_ei, &resp);
+       if (ret < 0) {
+               dev_err(sysmon->dev, "failed to allocate QMI txn\n");
+               return;
+       }
+
+       ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn,
+                              SSCTL_SHUTDOWN_REQ, 0, NULL, NULL);
+       if (ret < 0) {
+               dev_err(sysmon->dev, "failed to send shutdown request\n");
+               qmi_txn_cancel(&txn);
+               return;
+       }
+
+       ret = qmi_txn_wait(&txn, 5 * HZ);
+       if (ret < 0)
+               dev_err(sysmon->dev, "failed receiving QMI response\n");
+       else if (resp.resp.result)
+               dev_err(sysmon->dev, "shutdown request failed\n");
+       else
+               dev_dbg(sysmon->dev, "shutdown request completed\n");
+}
+
+/**
+ * ssctl_send_event() - send notification of other remote's SSR event
+ * @sysmon:    sysmon context
+ * @name:      other remote's name
+ */
+static void ssctl_send_event(struct qcom_sysmon *sysmon, const char *name)
+{
+       struct ssctl_subsys_event_resp resp;
+       struct ssctl_subsys_event_req req;
+       struct qmi_txn txn;
+       int ret;
+
+       memset(&resp, 0, sizeof(resp));
+       ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_subsys_event_resp_ei, &resp);
+       if (ret < 0) {
+               dev_err(sysmon->dev, "failed to allocate QMI txn\n");
+               return;
+       }
+
+       memset(&req, 0, sizeof(req));
+       strlcpy(req.subsys_name, name, sizeof(req.subsys_name));
+       req.subsys_name_len = strlen(req.subsys_name);
+       req.event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
+       req.evt_driven_valid = true;
+       req.evt_driven = SSCTL_SSR_EVENT_FORCED;
+
+       ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn,
+                              SSCTL_SUBSYS_EVENT_REQ, 40,
+                              ssctl_subsys_event_req_ei, &req);
+       if (ret < 0) {
+               dev_err(sysmon->dev, "failed to send shutdown request\n");
+               qmi_txn_cancel(&txn);
+               return;
+       }
+
+       ret = qmi_txn_wait(&txn, 5 * HZ);
+       if (ret < 0)
+               dev_err(sysmon->dev, "failed receiving QMI response\n");
+       else if (resp.resp.result)
+               dev_err(sysmon->dev, "ssr event send failed\n");
+       else
+               dev_dbg(sysmon->dev, "ssr event send completed\n");
+}
+
+/**
+ * ssctl_new_server() - QMI callback indicating a new service
+ * @qmi:       QMI handle
+ * @svc:       service information
+ *
+ * Return: 0 if we're interested in this service, -EINVAL otherwise.
+ */
+static int ssctl_new_server(struct qmi_handle *qmi, struct qmi_service *svc)
+{
+       struct qcom_sysmon *sysmon = container_of(qmi, struct qcom_sysmon, qmi);
+
+       switch (svc->version) {
+       case 1:
+               if (svc->instance != 0)
+                       return -EINVAL;
+               if (strcmp(sysmon->name, "modem"))
+                       return -EINVAL;
+               break;
+       case 2:
+               if (svc->instance != sysmon->ssctl_instance)
+                       return -EINVAL;
+               break;
+       default:
+               return -EINVAL;
+       };
+
+       sysmon->ssctl_version = svc->version;
+
+       sysmon->ssctl.sq_family = AF_QIPCRTR;
+       sysmon->ssctl.sq_node = svc->node;
+       sysmon->ssctl.sq_port = svc->port;
+
+       svc->priv = sysmon;
+
+       return 0;
+}
+
+/**
+ * ssctl_del_server() - QMI callback indicating that @svc is removed
+ * @qmi:       QMI handle
+ * @svc:       service information
+ */
+static void ssctl_del_server(struct qmi_handle *qmi, struct qmi_service *svc)
+{
+       struct qcom_sysmon *sysmon = svc->priv;
+
+       sysmon->ssctl_version = 0;
+}
+
+static const struct qmi_ops ssctl_ops = {
+       .new_server = ssctl_new_server,
+       .del_server = ssctl_del_server,
+};
+
+static int sysmon_start(struct rproc_subdev *subdev)
+{
+       return 0;
+}
+
+static void sysmon_stop(struct rproc_subdev *subdev, bool crashed)
+{
+       struct qcom_sysmon *sysmon = container_of(subdev, struct qcom_sysmon, subdev);
+
+       blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)sysmon->name);
+
+       /* Don't request graceful shutdown if we've crashed */
+       if (crashed)
+               return;
+
+       if (sysmon->ssctl_version)
+               ssctl_request_shutdown(sysmon);
+       else if (sysmon->ept)
+               sysmon_request_shutdown(sysmon);
+}
+
+/**
+ * sysmon_notify() - notify sysmon target of another's SSR
+ * @nb:                notifier_block associated with sysmon instance
+ * @event:     unused
+ * @data:      SSR identifier of the remote that is going down
+ */
+static int sysmon_notify(struct notifier_block *nb, unsigned long event,
+                        void *data)
+{
+       struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon, nb);
+       struct rproc *rproc = sysmon->rproc;
+       const char *ssr_name = data;
+
+       /* Skip non-running rprocs and the originating instance */
+       if (rproc->state != RPROC_RUNNING || !strcmp(data, sysmon->name)) {
+               dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name);
+               return NOTIFY_DONE;
+       }
+
+       /* Only SSCTL version 2 supports SSR events */
+       if (sysmon->ssctl_version == 2)
+               ssctl_send_event(sysmon, ssr_name);
+       else if (sysmon->ept)
+               sysmon_send_event(sysmon, ssr_name);
+
+       return NOTIFY_DONE;
+}
+
+/**
+ * qcom_add_sysmon_subdev() - create a sysmon subdev for the given remoteproc
+ * @rproc:     rproc context to associate the subdev with
+ * @name:      name of this subdev, to use in SSR
+ * @ssctl_instance: instance id of the ssctl QMI service
+ *
+ * Return: A new qcom_sysmon object, or NULL on failure
+ */
+struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+                                          const char *name,
+                                          int ssctl_instance)
+{
+       struct qcom_sysmon *sysmon;
+       int ret;
+
+       sysmon = kzalloc(sizeof(*sysmon), GFP_KERNEL);
+       if (!sysmon)
+               return NULL;
+
+       sysmon->dev = rproc->dev.parent;
+       sysmon->rproc = rproc;
+
+       sysmon->name = name;
+       sysmon->ssctl_instance = ssctl_instance;
+
+       init_completion(&sysmon->comp);
+       mutex_init(&sysmon->lock);
+
+       ret = qmi_handle_init(&sysmon->qmi, SSCTL_MAX_MSG_LEN, &ssctl_ops, NULL);
+       if (ret < 0) {
+               dev_err(sysmon->dev, "failed to initialize qmi handle\n");
+               kfree(sysmon);
+               return NULL;
+       }
+
+       qmi_add_lookup(&sysmon->qmi, 43, 0, 0);
+
+       rproc_add_subdev(rproc, &sysmon->subdev, sysmon_start, sysmon_stop);
+
+       sysmon->nb.notifier_call = sysmon_notify;
+       blocking_notifier_chain_register(&sysmon_notifiers, &sysmon->nb);
+
+       mutex_lock(&sysmon_lock);
+       list_add(&sysmon->node, &sysmon_list);
+       mutex_unlock(&sysmon_lock);
+
+       return sysmon;
+}
+EXPORT_SYMBOL_GPL(qcom_add_sysmon_subdev);
+
+/**
+ * qcom_remove_sysmon_subdev() - release a qcom_sysmon
+ * @sysmon:    sysmon context, as retrieved by qcom_add_sysmon_subdev()
+ */
+void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
+{
+       if (!sysmon)
+               return;
+
+       mutex_lock(&sysmon_lock);
+       list_del(&sysmon->node);
+       mutex_unlock(&sysmon_lock);
+
+       blocking_notifier_chain_unregister(&sysmon_notifiers, &sysmon->nb);
+
+       rproc_remove_subdev(sysmon->rproc, &sysmon->subdev);
+
+       qmi_handle_release(&sysmon->qmi);
+
+       kfree(sysmon);
+}
+EXPORT_SYMBOL_GPL(qcom_remove_sysmon_subdev);
+
+/**
+ * sysmon_probe() - probe sys_mon channel
+ * @rpdev:     rpmsg device handle
+ *
+ * Find the sysmon context associated with the ancestor remoteproc and assign
+ * this rpmsg device with said sysmon context.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+static int sysmon_probe(struct rpmsg_device *rpdev)
+{
+       struct qcom_sysmon *sysmon;
+       struct rproc *rproc;
+
+       rproc = rproc_get_by_child(&rpdev->dev);
+       if (!rproc) {
+               dev_err(&rpdev->dev, "sysmon device not child of rproc\n");
+               return -EINVAL;
+       }
+
+       mutex_lock(&sysmon_lock);
+       list_for_each_entry(sysmon, &sysmon_list, node) {
+               if (sysmon->rproc == rproc)
+                       goto found;
+       }
+       mutex_unlock(&sysmon_lock);
+
+       dev_err(&rpdev->dev, "no sysmon associated with parent rproc\n");
+
+       return -EINVAL;
+
+found:
+       mutex_unlock(&sysmon_lock);
+
+       rpdev->ept->priv = sysmon;
+       sysmon->ept = rpdev->ept;
+
+       return 0;
+}
+
+/**
+ * sysmon_remove() - sys_mon channel remove handler
+ * @rpdev:     rpmsg device handle
+ *
+ * Disassociate the rpmsg device with the sysmon instance.
+ */
+static void sysmon_remove(struct rpmsg_device *rpdev)
+{
+       struct qcom_sysmon *sysmon = rpdev->ept->priv;
+
+       sysmon->ept = NULL;
+}
+
+static const struct rpmsg_device_id sysmon_match[] = {
+       { "sys_mon" },
+       {}
+};
+
+static struct rpmsg_driver sysmon_driver = {
+       .probe = sysmon_probe,
+       .remove = sysmon_remove,
+       .callback = sysmon_callback,
+       .id_table = sysmon_match,
+       .drv = {
+               .name = "qcom_sysmon",
+       },
+};
+
+module_rpmsg_driver(sysmon_driver);
+
+MODULE_DESCRIPTION("Qualcomm sysmon driver");
+MODULE_LICENSE("GPL v2");
index 3f0609236a769c995f75a411c37b638177254400..b0e07e9f42d5698e64baa7897d75460c28a89690 100644 (file)
@@ -40,6 +40,7 @@
 #define WCNSS_CRASH_REASON_SMEM                422
 #define WCNSS_FIRMWARE_NAME            "wcnss.mdt"
 #define WCNSS_PAS_ID                   6
+#define WCNSS_SSCTL_ID                 0x13
 
 #define WCNSS_SPARE_NVBIN_DLND         BIT(25)
 
@@ -98,6 +99,7 @@ struct qcom_wcnss {
        size_t mem_size;
 
        struct qcom_rproc_subdev smd_subdev;
+       struct qcom_sysmon *sysmon;
 };
 
 static const struct wcnss_data riva_data = {
@@ -153,7 +155,8 @@ static int wcnss_load(struct rproc *rproc, const struct firmware *fw)
        struct qcom_wcnss *wcnss = (struct qcom_wcnss *)rproc->priv;
 
        return qcom_mdt_load(wcnss->dev, fw, rproc->firmware, WCNSS_PAS_ID,
-                            wcnss->mem_region, wcnss->mem_phys, wcnss->mem_size);
+                            wcnss->mem_region, wcnss->mem_phys,
+                            wcnss->mem_size, &wcnss->mem_reloc);
 }
 
 static void wcnss_indicate_nv_download(struct qcom_wcnss *wcnss)
@@ -308,6 +311,7 @@ static const struct rproc_ops wcnss_ops = {
        .start = wcnss_start,
        .stop = wcnss_stop,
        .da_to_va = wcnss_da_to_va,
+       .parse_fw = qcom_register_dump_segments,
        .load = wcnss_load,
 };
 
@@ -332,9 +336,6 @@ static irqreturn_t wcnss_fatal_interrupt(int irq, void *dev)
 
        rproc_report_crash(wcnss->rproc, RPROC_FATAL_ERROR);
 
-       if (!IS_ERR(msg))
-               msg[0] = '\0';
-
        return IRQ_HANDLED;
 }
 
@@ -551,6 +552,7 @@ static int wcnss_probe(struct platform_device *pdev)
        }
 
        qcom_add_smd_subdev(rproc, &wcnss->smd_subdev);
+       wcnss->sysmon = qcom_add_sysmon_subdev(rproc, "wcnss", WCNSS_SSCTL_ID);
 
        ret = rproc_add(rproc);
        if (ret)
@@ -573,6 +575,7 @@ static int wcnss_remove(struct platform_device *pdev)
        qcom_smem_state_put(wcnss->state);
        rproc_del(wcnss->rproc);
 
+       qcom_remove_sysmon_subdev(wcnss->sysmon);
        qcom_remove_smd_subdev(wcnss->rproc, &wcnss->smd_subdev);
        rproc_free(wcnss->rproc);
 
index 4170dfbd93bdba5e9fa6a57e5c68ec9d95a9a274..6d9c5832ce47d3052568f10794652686237c00e8 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/firmware.h>
 #include <linux/string.h>
 #include <linux/debugfs.h>
+#include <linux/devcoredump.h>
 #include <linux/remoteproc.h>
 #include <linux/iommu.h>
 #include <linux/idr.h>
@@ -307,7 +308,7 @@ static int rproc_vdev_do_probe(struct rproc_subdev *subdev)
        return rproc_add_virtio_dev(rvdev, rvdev->id);
 }
 
-static void rproc_vdev_do_remove(struct rproc_subdev *subdev)
+static void rproc_vdev_do_remove(struct rproc_subdev *subdev, bool crashed)
 {
        struct rproc_vdev *rvdev = container_of(subdev, struct rproc_vdev, subdev);
 
@@ -788,17 +789,31 @@ static int rproc_probe_subdevices(struct rproc *rproc)
 
 unroll_registration:
        list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
-               subdev->remove(subdev);
+               subdev->remove(subdev, true);
 
        return ret;
 }
 
-static void rproc_remove_subdevices(struct rproc *rproc)
+static void rproc_remove_subdevices(struct rproc *rproc, bool crashed)
 {
        struct rproc_subdev *subdev;
 
        list_for_each_entry_reverse(subdev, &rproc->subdevs, node)
-               subdev->remove(subdev);
+               subdev->remove(subdev, crashed);
+}
+
+/**
+ * rproc_coredump_cleanup() - clean up dump_segments list
+ * @rproc: the remote processor handle
+ */
+static void rproc_coredump_cleanup(struct rproc *rproc)
+{
+       struct rproc_dump_segment *entry, *tmp;
+
+       list_for_each_entry_safe(entry, tmp, &rproc->dump_segments, node) {
+               list_del(&entry->node);
+               kfree(entry);
+       }
 }
 
 /**
@@ -848,6 +863,8 @@ static void rproc_resource_cleanup(struct rproc *rproc)
        /* clean up remote vdev entries */
        list_for_each_entry_safe(rvdev, rvtmp, &rproc->rvdevs, node)
                kref_put(&rvdev->refcount, rproc_vdev_release);
+
+       rproc_coredump_cleanup(rproc);
 }
 
 static int rproc_start(struct rproc *rproc, const struct firmware *fw)
@@ -927,8 +944,8 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 
        rproc->bootaddr = rproc_get_boot_addr(rproc, fw);
 
-       /* load resource table */
-       ret = rproc_load_rsc_table(rproc, fw);
+       /* Load resource table, core dump segment list etc from the firmware */
+       ret = rproc_parse_fw(rproc, fw);
        if (ret)
                goto disable_iommu;
 
@@ -992,13 +1009,13 @@ static int rproc_trigger_auto_boot(struct rproc *rproc)
        return ret;
 }
 
-static int rproc_stop(struct rproc *rproc)
+static int rproc_stop(struct rproc *rproc, bool crashed)
 {
        struct device *dev = &rproc->dev;
        int ret;
 
        /* remove any subdevices for the remote processor */
-       rproc_remove_subdevices(rproc);
+       rproc_remove_subdevices(rproc, crashed);
 
        /* the installed resource table is no longer accessible */
        rproc->table_ptr = rproc->cached_table;
@@ -1017,6 +1034,113 @@ static int rproc_stop(struct rproc *rproc)
        return 0;
 }
 
+/**
+ * rproc_coredump_add_segment() - add segment of device memory to coredump
+ * @rproc:     handle of a remote processor
+ * @da:                device address
+ * @size:      size of segment
+ *
+ * Add device memory to the list of segments to be included in a coredump for
+ * the remoteproc.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
+{
+       struct rproc_dump_segment *segment;
+
+       segment = kzalloc(sizeof(*segment), GFP_KERNEL);
+       if (!segment)
+               return -ENOMEM;
+
+       segment->da = da;
+       segment->size = size;
+
+       list_add_tail(&segment->node, &rproc->dump_segments);
+
+       return 0;
+}
+EXPORT_SYMBOL(rproc_coredump_add_segment);
+
+/**
+ * rproc_coredump() - perform coredump
+ * @rproc:     rproc handle
+ *
+ * This function will generate an ELF header for the registered segments
+ * and create a devcoredump device associated with rproc.
+ */
+static void rproc_coredump(struct rproc *rproc)
+{
+       struct rproc_dump_segment *segment;
+       struct elf32_phdr *phdr;
+       struct elf32_hdr *ehdr;
+       size_t data_size;
+       size_t offset;
+       void *data;
+       void *ptr;
+       int phnum = 0;
+
+       if (list_empty(&rproc->dump_segments))
+               return;
+
+       data_size = sizeof(*ehdr);
+       list_for_each_entry(segment, &rproc->dump_segments, node) {
+               data_size += sizeof(*phdr) + segment->size;
+
+               phnum++;
+       }
+
+       data = vmalloc(data_size);
+       if (!data)
+               return;
+
+       ehdr = data;
+
+       memset(ehdr, 0, sizeof(*ehdr));
+       memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+       ehdr->e_ident[EI_CLASS] = ELFCLASS32;
+       ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
+       ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+       ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
+       ehdr->e_type = ET_CORE;
+       ehdr->e_machine = EM_NONE;
+       ehdr->e_version = EV_CURRENT;
+       ehdr->e_entry = rproc->bootaddr;
+       ehdr->e_phoff = sizeof(*ehdr);
+       ehdr->e_ehsize = sizeof(*ehdr);
+       ehdr->e_phentsize = sizeof(*phdr);
+       ehdr->e_phnum = phnum;
+
+       phdr = data + ehdr->e_phoff;
+       offset = ehdr->e_phoff + sizeof(*phdr) * ehdr->e_phnum;
+       list_for_each_entry(segment, &rproc->dump_segments, node) {
+               memset(phdr, 0, sizeof(*phdr));
+               phdr->p_type = PT_LOAD;
+               phdr->p_offset = offset;
+               phdr->p_vaddr = segment->da;
+               phdr->p_paddr = segment->da;
+               phdr->p_filesz = segment->size;
+               phdr->p_memsz = segment->size;
+               phdr->p_flags = PF_R | PF_W | PF_X;
+               phdr->p_align = 0;
+
+               ptr = rproc_da_to_va(rproc, segment->da, segment->size);
+               if (!ptr) {
+                       dev_err(&rproc->dev,
+                               "invalid coredump segment (%pad, %zu)\n",
+                               &segment->da, segment->size);
+                       memset(data + offset, 0xff, segment->size);
+               } else {
+                       memcpy(data + offset, ptr, segment->size);
+               }
+
+               offset += phdr->p_filesz;
+               phdr++;
+       }
+
+       dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
+}
+
 /**
  * rproc_trigger_recovery() - recover a remoteproc
  * @rproc: the remote processor
@@ -1039,10 +1163,13 @@ int rproc_trigger_recovery(struct rproc *rproc)
        if (ret)
                return ret;
 
-       ret = rproc_stop(rproc);
+       ret = rproc_stop(rproc, false);
        if (ret)
                goto unlock_mutex;
 
+       /* generate coredump */
+       rproc_coredump(rproc);
+
        /* load firmware */
        ret = request_firmware(&firmware_p, rproc->firmware, dev);
        if (ret < 0) {
@@ -1189,7 +1316,7 @@ void rproc_shutdown(struct rproc *rproc)
        if (!atomic_dec_and_test(&rproc->power))
                goto out;
 
-       ret = rproc_stop(rproc);
+       ret = rproc_stop(rproc, true);
        if (ret) {
                atomic_inc(&rproc->power);
                goto out;
@@ -1428,7 +1555,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
        /* Default to ELF loader if no load function is specified */
        if (!rproc->ops->load) {
                rproc->ops->load = rproc_elf_load_segments;
-               rproc->ops->load_rsc_table = rproc_elf_load_rsc_table;
+               rproc->ops->parse_fw = rproc_elf_load_rsc_table;
                rproc->ops->find_loaded_rsc_table = rproc_elf_find_loaded_rsc_table;
                rproc->ops->sanity_check = rproc_elf_sanity_check;
                rproc->ops->get_boot_addr = rproc_elf_get_boot_addr;
@@ -1443,6 +1570,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
        INIT_LIST_HEAD(&rproc->traces);
        INIT_LIST_HEAD(&rproc->rvdevs);
        INIT_LIST_HEAD(&rproc->subdevs);
+       INIT_LIST_HEAD(&rproc->dump_segments);
 
        INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work);
 
@@ -1535,7 +1663,7 @@ EXPORT_SYMBOL(rproc_del);
 void rproc_add_subdev(struct rproc *rproc,
                      struct rproc_subdev *subdev,
                      int (*probe)(struct rproc_subdev *subdev),
-                     void (*remove)(struct rproc_subdev *subdev))
+                     void (*remove)(struct rproc_subdev *subdev, bool crashed))
 {
        subdev->probe = probe;
        subdev->remove = remove;
index 55a2950c5cb734eeb16ca5326d08d45d44a7234f..7570beb035b5f462f9c9ae19ef9ed1ceff6a6424 100644 (file)
@@ -88,11 +88,10 @@ int rproc_load_segments(struct rproc *rproc, const struct firmware *fw)
        return -EINVAL;
 }
 
-static inline int rproc_load_rsc_table(struct rproc *rproc,
-                                      const struct firmware *fw)
+static inline int rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
 {
-       if (rproc->ops->load_rsc_table)
-               return rproc->ops->load_rsc_table(rproc, fw);
+       if (rproc->ops->parse_fw)
+               return rproc->ops->parse_fw(rproc, fw);
 
        return 0;
 }
index e0f31ed096a50223bfa04b960fb0bbdd45e72434..768ef542a841161ce0276c2a181067c3ed12ba28 100644 (file)
@@ -113,7 +113,7 @@ struct qcom_glink {
        spinlock_t rx_lock;
        struct list_head rx_queue;
 
-       struct mutex tx_lock;
+       spinlock_t tx_lock;
 
        spinlock_t idr_lock;
        struct idr lcids;
@@ -288,15 +288,14 @@ static int qcom_glink_tx(struct qcom_glink *glink,
                         const void *data, size_t dlen, bool wait)
 {
        unsigned int tlen = hlen + dlen;
-       int ret;
+       unsigned long flags;
+       int ret = 0;
 
        /* Reject packets that are too big */
        if (tlen >= glink->tx_pipe->length)
                return -EINVAL;
 
-       ret = mutex_lock_interruptible(&glink->tx_lock);
-       if (ret)
-               return ret;
+       spin_lock_irqsave(&glink->tx_lock, flags);
 
        while (qcom_glink_tx_avail(glink) < tlen) {
                if (!wait) {
@@ -304,7 +303,12 @@ static int qcom_glink_tx(struct qcom_glink *glink,
                        goto out;
                }
 
+               /* Wait without holding the tx_lock */
+               spin_unlock_irqrestore(&glink->tx_lock, flags);
+
                usleep_range(10000, 15000);
+
+               spin_lock_irqsave(&glink->tx_lock, flags);
        }
 
        qcom_glink_tx_write(glink, hdr, hlen, data, dlen);
@@ -313,7 +317,7 @@ static int qcom_glink_tx(struct qcom_glink *glink,
        mbox_client_txdone(glink->mbox_chan, 0);
 
 out:
-       mutex_unlock(&glink->tx_lock);
+       spin_unlock_irqrestore(&glink->tx_lock, flags);
 
        return ret;
 }
@@ -1567,7 +1571,7 @@ struct qcom_glink *qcom_glink_native_probe(struct device *dev,
        glink->features = features;
        glink->intentless = intentless;
 
-       mutex_init(&glink->tx_lock);
+       spin_lock_init(&glink->tx_lock);
        spin_lock_init(&glink->rx_lock);
        INIT_LIST_HEAD(&glink->rx_queue);
        INIT_WORK(&glink->rx_work, qcom_glink_work);
index 892f2b92a4d854d5122887f21591184e18973b0f..3fa9d43e2c879c82a12c7866737fa5b3d92909c6 100644 (file)
@@ -217,6 +217,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent,
        ret = device_register(dev);
        if (ret) {
                pr_err("failed to register glink edge\n");
+               put_device(dev);
                return ERR_PTR(ret);
        }
 
@@ -299,7 +300,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent,
        return glink;
 
 err_put_dev:
-       put_device(dev);
+       device_unregister(dev);
 
        return ERR_PTR(ret);
 }
index 92d0c6a7a837248252e112e1157d674aa6506f47..5ce9bf7b897d82da403c489ed69abd3ec5de41d5 100644 (file)
@@ -167,9 +167,9 @@ struct qcom_smd_endpoint {
        struct qcom_smd_channel *qsch;
 };
 
-#define to_smd_device(_rpdev)  container_of(_rpdev, struct qcom_smd_device, rpdev)
+#define to_smd_device(r)       container_of(r, struct qcom_smd_device, rpdev)
 #define to_smd_edge(d)         container_of(d, struct qcom_smd_edge, dev)
-#define to_smd_endpoint(ept)   container_of(ept, struct qcom_smd_endpoint, ept)
+#define to_smd_endpoint(e)     container_of(e, struct qcom_smd_endpoint, ept)
 
 /**
  * struct qcom_smd_channel - smd channel struct
@@ -205,7 +205,7 @@ struct qcom_smd_channel {
        struct smd_channel_info_pair *info;
        struct smd_channel_info_word_pair *info_word;
 
-       struct mutex tx_lock;
+       spinlock_t tx_lock;
        wait_queue_head_t fblockread_event;
 
        void *tx_fifo;
@@ -729,6 +729,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 {
        __le32 hdr[5] = { cpu_to_le32(len), };
        int tlen = sizeof(hdr) + len;
+       unsigned long flags;
        int ret;
 
        /* Word aligned channels only accept word size aligned data */
@@ -739,9 +740,11 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
        if (tlen >= channel->fifo_size)
                return -EINVAL;
 
-       ret = mutex_lock_interruptible(&channel->tx_lock);
-       if (ret)
-               return ret;
+       /* Highlight the fact that if we enter the loop below we might sleep */
+       if (wait)
+               might_sleep();
+
+       spin_lock_irqsave(&channel->tx_lock, flags);
 
        while (qcom_smd_get_tx_avail(channel) < tlen &&
               channel->state == SMD_CHANNEL_OPENED) {
@@ -753,7 +756,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
                SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 0);
 
                /* Wait without holding the tx_lock */
-               mutex_unlock(&channel->tx_lock);
+               spin_unlock_irqrestore(&channel->tx_lock, flags);
 
                ret = wait_event_interruptible(channel->fblockread_event,
                                       qcom_smd_get_tx_avail(channel) >= tlen ||
@@ -761,9 +764,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
                if (ret)
                        return ret;
 
-               ret = mutex_lock_interruptible(&channel->tx_lock);
-               if (ret)
-                       return ret;
+               spin_lock_irqsave(&channel->tx_lock, flags);
 
                SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 1);
        }
@@ -787,7 +788,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
        qcom_smd_signal_channel(channel);
 
 out_unlock:
-       mutex_unlock(&channel->tx_lock);
+       spin_unlock_irqrestore(&channel->tx_lock, flags);
 
        return ret;
 }
@@ -996,8 +997,26 @@ static struct device_node *qcom_smd_match_channel(struct device_node *edge_node,
        return NULL;
 }
 
+static int qcom_smd_announce_create(struct rpmsg_device *rpdev)
+{
+       struct qcom_smd_endpoint *qept = to_smd_endpoint(rpdev->ept);
+       struct qcom_smd_channel *channel = qept->qsch;
+       unsigned long flags;
+       bool kick_state;
+
+       spin_lock_irqsave(&channel->recv_lock, flags);
+       kick_state = qcom_smd_channel_intr(channel);
+       spin_unlock_irqrestore(&channel->recv_lock, flags);
+
+       if (kick_state)
+               schedule_work(&channel->edge->state_work);
+
+       return 0;
+}
+
 static const struct rpmsg_device_ops qcom_smd_device_ops = {
        .create_ept = qcom_smd_create_ept,
+       .announce_create = qcom_smd_announce_create,
 };
 
 static const struct rpmsg_endpoint_ops qcom_smd_endpoint_ops = {
@@ -1090,7 +1109,7 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed
        if (!channel->name)
                return ERR_PTR(-ENOMEM);
 
-       mutex_init(&channel->tx_lock);
+       spin_lock_init(&channel->tx_lock);
        spin_lock_init(&channel->recv_lock);
        init_waitqueue_head(&channel->fblockread_event);
        init_waitqueue_head(&channel->state_change_event);
@@ -1234,6 +1253,11 @@ static void qcom_channel_state_worker(struct work_struct *work)
                if (channel->state != SMD_CHANNEL_CLOSED)
                        continue;
 
+               remote_state = GET_RX_CHANNEL_INFO(channel, state);
+               if (remote_state != SMD_CHANNEL_OPENING &&
+                   remote_state != SMD_CHANNEL_OPENED)
+                       continue;
+
                if (channel->registered)
                        continue;
 
@@ -1408,6 +1432,7 @@ struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
        ret = device_register(&edge->dev);
        if (ret) {
                pr_err("failed to register smd edge\n");
+               put_device(&edge->dev);
                return ERR_PTR(ret);
        }
 
@@ -1428,7 +1453,7 @@ struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
        return edge;
 
 unregister_dev:
-       put_device(&edge->dev);
+       device_unregister(&edge->dev);
        return ERR_PTR(ret);
 }
 EXPORT_SYMBOL(qcom_smd_register_edge);
index 5a081762afcc5af07cef291edc278a5bc91ba9ef..920a02f0462c45f520997769890351c89fcc1242 100644 (file)
@@ -442,7 +442,7 @@ static int rpmsg_dev_probe(struct device *dev)
                goto out;
        }
 
-       if (rpdev->ops->announce_create)
+       if (ept && rpdev->ops->announce_create)
                err = rpdev->ops->announce_create(rpdev);
 out:
        return err;
index 319e3c8976d56055cf69b66287ea669767b8caf0..59e6dede3db36ad0f9891cd1a290f32702709d7f 100644 (file)
@@ -407,6 +407,16 @@ config RTC_DRV_ISL12022
          This driver can also be built as a module. If so, the module
          will be called rtc-isl12022.
 
+config RTC_DRV_ISL12026
+       tristate "Intersil ISL12026"
+       depends on OF || COMPILE_TEST
+       help
+         If you say yes here you get support for the
+         Intersil ISL12026 RTC chip.
+
+         This driver can also be built as a module. If so, the module
+         will be called rtc-isl12026.
+
 config RTC_DRV_X1205
        tristate "Xicor/Intersil X1205"
        help
@@ -1413,6 +1423,7 @@ config RTC_DRV_AT91RM9200
 config RTC_DRV_AT91SAM9
        tristate "AT91SAM9 RTT as RTC"
        depends on ARCH_AT91 || COMPILE_TEST
+       depends on HAS_IOMEM
        select MFD_SYSCON
        help
          Some AT91SAM9 SoCs provide an RTT (Real Time Timer) block which
@@ -1502,7 +1513,7 @@ config RTC_DRV_STARFIRE
 
 config RTC_DRV_TX4939
        tristate "TX4939 SoC"
-       depends on SOC_TX4939
+       depends on SOC_TX4939 || COMPILE_TEST
        help
          Driver for the internal RTC (Realtime Clock) module found on
          Toshiba TX4939 SoC.
index ee0206becd9fc36f31ed28c8f884089cc5b2ed5f..5ff2fc0c361a84bcabe19642c9232b42cf0c25dd 100644 (file)
@@ -75,6 +75,7 @@ obj-$(CONFIG_RTC_DRV_HID_SENSOR_TIME) += rtc-hid-sensor-time.o
 obj-$(CONFIG_RTC_DRV_HYM8563)  += rtc-hym8563.o
 obj-$(CONFIG_RTC_DRV_IMXDI)    += rtc-imxdi.o
 obj-$(CONFIG_RTC_DRV_ISL12022) += rtc-isl12022.o
+obj-$(CONFIG_RTC_DRV_ISL12026) += rtc-isl12026.o
 obj-$(CONFIG_RTC_DRV_ISL1208)  += rtc-isl1208.o
 obj-$(CONFIG_RTC_DRV_JZ4740)   += rtc-jz4740.o
 obj-$(CONFIG_RTC_DRV_LP8788)   += rtc-lp8788.o
index 722d683e0b0f5fd3f024d1da7b65ac61e42320b0..d37588f080556d16af315862b99370ef4a948ea5 100644 (file)
@@ -211,6 +211,73 @@ static int rtc_device_get_id(struct device *dev)
        return id;
 }
 
+static void rtc_device_get_offset(struct rtc_device *rtc)
+{
+       time64_t range_secs;
+       u32 start_year;
+       int ret;
+
+       /*
+        * If RTC driver did not implement the range of RTC hardware device,
+        * then we can not expand the RTC range by adding or subtracting one
+        * offset.
+        */
+       if (rtc->range_min == rtc->range_max)
+               return;
+
+       ret = device_property_read_u32(rtc->dev.parent, "start-year",
+                                      &start_year);
+       if (!ret) {
+               rtc->start_secs = mktime64(start_year, 1, 1, 0, 0, 0);
+               rtc->set_start_time = true;
+       }
+
+       /*
+        * If user did not implement the start time for RTC driver, then no
+        * need to expand the RTC range.
+        */
+       if (!rtc->set_start_time)
+               return;
+
+       range_secs = rtc->range_max - rtc->range_min + 1;
+
+       /*
+        * If the start_secs is larger than the maximum seconds (rtc->range_max)
+        * supported by RTC hardware or the maximum seconds of new expanded
+        * range (start_secs + rtc->range_max - rtc->range_min) is less than
+        * rtc->range_min, which means the minimum seconds (rtc->range_min) of
+        * RTC hardware will be mapped to start_secs by adding one offset, so
+        * the offset seconds calculation formula should be:
+        * rtc->offset_secs = rtc->start_secs - rtc->range_min;
+        *
+        * If the start_secs is larger than the minimum seconds (rtc->range_min)
+        * supported by RTC hardware, then there is one region is overlapped
+        * between the original RTC hardware range and the new expanded range,
+        * and this overlapped region do not need to be mapped into the new
+        * expanded range due to it is valid for RTC device. So the minimum
+        * seconds of RTC hardware (rtc->range_min) should be mapped to
+        * rtc->range_max + 1, then the offset seconds formula should be:
+        * rtc->offset_secs = rtc->range_max - rtc->range_min + 1;
+        *
+        * If the start_secs is less than the minimum seconds (rtc->range_min),
+        * which is similar to case 2. So the start_secs should be mapped to
+        * start_secs + rtc->range_max - rtc->range_min + 1, then the
+        * offset seconds formula should be:
+        * rtc->offset_secs = -(rtc->range_max - rtc->range_min + 1);
+        *
+        * Otherwise the offset seconds should be 0.
+        */
+       if (rtc->start_secs > rtc->range_max ||
+           rtc->start_secs + range_secs - 1 < rtc->range_min)
+               rtc->offset_secs = rtc->start_secs - rtc->range_min;
+       else if (rtc->start_secs > rtc->range_min)
+               rtc->offset_secs = range_secs;
+       else if (rtc->start_secs < rtc->range_min)
+               rtc->offset_secs = -range_secs;
+       else
+               rtc->offset_secs = 0;
+}
+
 /**
  * rtc_device_register - register w/ RTC class
  * @dev: the device to register
@@ -247,6 +314,8 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 
        dev_set_name(&rtc->dev, "rtc%d", id);
 
+       rtc_device_get_offset(rtc);
+
        /* Check to see if there is an ALARM already set in hw */
        err = __rtc_read_alarm(rtc, &alrm);
 
@@ -293,8 +362,6 @@ EXPORT_SYMBOL_GPL(rtc_device_register);
  */
 void rtc_device_unregister(struct rtc_device *rtc)
 {
-       rtc_nvmem_unregister(rtc);
-
        mutex_lock(&rtc->ops_lock);
        /*
         * Remove innards of this RTC, then disable it, before
@@ -312,6 +379,7 @@ static void devm_rtc_device_release(struct device *dev, void *res)
 {
        struct rtc_device *rtc = *(struct rtc_device **)res;
 
+       rtc_nvmem_unregister(rtc);
        rtc_device_unregister(rtc);
 }
 
@@ -382,6 +450,8 @@ static void devm_rtc_release_device(struct device *dev, void *res)
 {
        struct rtc_device *rtc = *(struct rtc_device **)res;
 
+       rtc_nvmem_unregister(rtc);
+
        if (rtc->registered)
                rtc_device_unregister(rtc);
        else
@@ -435,6 +505,7 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
                return -EINVAL;
 
        rtc->owner = owner;
+       rtc_device_get_offset(rtc);
 
        /* Check to see if there is an ALARM already set in hw */
        err = __rtc_read_alarm(rtc, &alrm);
@@ -453,8 +524,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
 
        rtc_proc_add_device(rtc);
 
-       rtc_nvmem_register(rtc);
-
        rtc->registered = true;
        dev_info(rtc->dev.parent, "registered as %s\n",
                 dev_name(&rtc->dev));
index e1cfa06810ef275704ab887935b39ca438ae5c3a..e79f2a181ad24217a3e3bc232593184b82d494fd 100644 (file)
@@ -49,6 +49,11 @@ static int __init rtc_hctosys(void)
 
        tv64.tv_sec = rtc_tm_to_time64(&tm);
 
+#if BITS_PER_LONG == 32
+       if (tv64.tv_sec > INT_MAX)
+               goto err_read;
+#endif
+
        err = do_settimeofday64(&tv64);
 
        dev_info(rtc->dev.parent,
index 672b192f8153aa00665daaefc541ebaf601cfe74..7cbdc9228dd500554ff40fb770b60abf48df6e4f 100644 (file)
 #include <linux/log2.h>
 #include <linux/workqueue.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/rtc.h>
+
 static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
 static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
 
+static void rtc_add_offset(struct rtc_device *rtc, struct rtc_time *tm)
+{
+       time64_t secs;
+
+       if (!rtc->offset_secs)
+               return;
+
+       secs = rtc_tm_to_time64(tm);
+
+       /*
+        * Since the reading time values from RTC device are always in the RTC
+        * original valid range, but we need to skip the overlapped region
+        * between expanded range and original range, which is no need to add
+        * the offset.
+        */
+       if ((rtc->start_secs > rtc->range_min && secs >= rtc->start_secs) ||
+           (rtc->start_secs < rtc->range_min &&
+            secs <= (rtc->start_secs + rtc->range_max - rtc->range_min)))
+               return;
+
+       rtc_time64_to_tm(secs + rtc->offset_secs, tm);
+}
+
+static void rtc_subtract_offset(struct rtc_device *rtc, struct rtc_time *tm)
+{
+       time64_t secs;
+
+       if (!rtc->offset_secs)
+               return;
+
+       secs = rtc_tm_to_time64(tm);
+
+       /*
+        * If the setting time values are in the valid range of RTC hardware
+        * device, then no need to subtract the offset when setting time to RTC
+        * device. Otherwise we need to subtract the offset to make the time
+        * values are valid for RTC hardware device.
+        */
+       if (secs >= rtc->range_min && secs <= rtc->range_max)
+               return;
+
+       rtc_time64_to_tm(secs - rtc->offset_secs, tm);
+}
+
+static int rtc_valid_range(struct rtc_device *rtc, struct rtc_time *tm)
+{
+       if (rtc->range_min != rtc->range_max) {
+               time64_t time = rtc_tm_to_time64(tm);
+               time64_t range_min = rtc->set_start_time ? rtc->start_secs :
+                       rtc->range_min;
+               time64_t range_max = rtc->set_start_time ?
+                       (rtc->start_secs + rtc->range_max - rtc->range_min) :
+                       rtc->range_max;
+
+               if (time < range_min || time > range_max)
+                       return -ERANGE;
+       }
+
+       return 0;
+}
+
 static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 {
        int err;
@@ -36,6 +100,8 @@ static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
                        return err;
                }
 
+               rtc_add_offset(rtc, tm);
+
                err = rtc_valid_tm(tm);
                if (err < 0)
                        dev_dbg(&rtc->dev, "read_time: rtc_time isn't valid\n");
@@ -53,6 +119,8 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 
        err = __rtc_read_time(rtc, tm);
        mutex_unlock(&rtc->ops_lock);
+
+       trace_rtc_read_time(rtc_tm_to_time64(tm), err);
        return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_time);
@@ -65,6 +133,12 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
        if (err != 0)
                return err;
 
+       err = rtc_valid_range(rtc, tm);
+       if (err)
+               return err;
+
+       rtc_subtract_offset(rtc, tm);
+
        err = mutex_lock_interruptible(&rtc->ops_lock);
        if (err)
                return err;
@@ -87,6 +161,8 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
        mutex_unlock(&rtc->ops_lock);
        /* A timer might have just expired */
        schedule_work(&rtc->irqwork);
+
+       trace_rtc_set_time(rtc_tm_to_time64(tm), err);
        return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_time);
@@ -119,6 +195,8 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *al
        }
 
        mutex_unlock(&rtc->ops_lock);
+
+       trace_rtc_read_alarm(rtc_tm_to_time64(&alarm->time), err);
        return err;
 }
 
@@ -316,6 +394,7 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
        }
        mutex_unlock(&rtc->ops_lock);
 
+       trace_rtc_read_alarm(rtc_tm_to_time64(&alarm->time), err);
        return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_alarm);
@@ -329,6 +408,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
        err = rtc_valid_tm(&alarm->time);
        if (err)
                return err;
+
+       rtc_subtract_offset(rtc, &alarm->time);
        scheduled = rtc_tm_to_time64(&alarm->time);
 
        /* Make sure we're not setting alarms in the past */
@@ -352,6 +433,7 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
        else
                err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
 
+       trace_rtc_set_alarm(rtc_tm_to_time64(&alarm->time), err);
        return err;
 }
 
@@ -363,6 +445,10 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
        if (err != 0)
                return err;
 
+       err = rtc_valid_range(rtc, &alarm->time);
+       if (err)
+               return err;
+
        err = mutex_lock_interruptible(&rtc->ops_lock);
        if (err)
                return err;
@@ -375,6 +461,8 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
                err = rtc_timer_enqueue(rtc, &rtc->aie_timer);
 
        mutex_unlock(&rtc->ops_lock);
+
+       rtc_add_offset(rtc, &alarm->time);
        return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_alarm);
@@ -406,6 +494,7 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 
                rtc->aie_timer.enabled = 1;
                timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node);
+               trace_rtc_timer_enqueue(&rtc->aie_timer);
        }
        mutex_unlock(&rtc->ops_lock);
        return err;
@@ -435,6 +524,8 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
                err = rtc->ops->alarm_irq_enable(rtc->dev.parent, enabled);
 
        mutex_unlock(&rtc->ops_lock);
+
+       trace_rtc_alarm_irq_enable(enabled, err);
        return err;
 }
 EXPORT_SYMBOL_GPL(rtc_alarm_irq_enable);
@@ -709,6 +800,8 @@ retry:
                rtc->pie_enabled = enabled;
        }
        spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+       trace_rtc_irq_set_state(enabled, err);
        return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_state);
@@ -745,6 +838,8 @@ retry:
                }
        }
        spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+       trace_rtc_irq_set_freq(freq, err);
        return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_freq);
@@ -779,6 +874,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
        }
 
        timerqueue_add(&rtc->timerqueue, &timer->node);
+       trace_rtc_timer_enqueue(timer);
        if (!next || ktime_before(timer->node.expires, next->expires)) {
                struct rtc_wkalrm alarm;
                int err;
@@ -790,6 +886,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
                        schedule_work(&rtc->irqwork);
                } else if (err) {
                        timerqueue_del(&rtc->timerqueue, &timer->node);
+                       trace_rtc_timer_dequeue(timer);
                        timer->enabled = 0;
                        return err;
                }
@@ -803,6 +900,7 @@ static void rtc_alarm_disable(struct rtc_device *rtc)
                return;
 
        rtc->ops->alarm_irq_enable(rtc->dev.parent, false);
+       trace_rtc_alarm_irq_enable(0, 0);
 }
 
 /**
@@ -821,6 +919,7 @@ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
 {
        struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue);
        timerqueue_del(&rtc->timerqueue, &timer->node);
+       trace_rtc_timer_dequeue(timer);
        timer->enabled = 0;
        if (next == &timer->node) {
                struct rtc_wkalrm alarm;
@@ -871,16 +970,19 @@ again:
                /* expire timer */
                timer = container_of(next, struct rtc_timer, node);
                timerqueue_del(&rtc->timerqueue, &timer->node);
+               trace_rtc_timer_dequeue(timer);
                timer->enabled = 0;
                if (timer->task.func)
                        timer->task.func(timer->task.private_data);
 
+               trace_rtc_timer_fired(timer);
                /* Re-add/fwd periodic timers */
                if (ktime_to_ns(timer->period)) {
                        timer->node.expires = ktime_add(timer->node.expires,
                                                        timer->period);
                        timer->enabled = 1;
                        timerqueue_add(&rtc->timerqueue, &timer->node);
+                       trace_rtc_timer_enqueue(timer);
                }
        }
 
@@ -902,6 +1004,7 @@ reprogram:
 
                        timer = container_of(next, struct rtc_timer, node);
                        timerqueue_del(&rtc->timerqueue, &timer->node);
+                       trace_rtc_timer_dequeue(timer);
                        timer->enabled = 0;
                        dev_err(&rtc->dev, "__rtc_set_alarm: err=%d\n", err);
                        goto again;
@@ -992,6 +1095,8 @@ int rtc_read_offset(struct rtc_device *rtc, long *offset)
        mutex_lock(&rtc->ops_lock);
        ret = rtc->ops->read_offset(rtc->dev.parent, offset);
        mutex_unlock(&rtc->ops_lock);
+
+       trace_rtc_read_offset(*offset, ret);
        return ret;
 }
 
@@ -1025,5 +1130,7 @@ int rtc_set_offset(struct rtc_device *rtc, long offset)
        mutex_lock(&rtc->ops_lock);
        ret = rtc->ops->set_offset(rtc->dev.parent, offset);
        mutex_unlock(&rtc->ops_lock);
+
+       trace_rtc_set_offset(offset, ret);
        return ret;
 }
index 8567b4ed9ac62e9c652ed1b008748219080747a5..17ec4c8d0fad7df6371be15dda833bf6d3e332ea 100644 (file)
@@ -14,8 +14,6 @@
 #include <linux/rtc.h>
 #include <linux/sysfs.h>
 
-#include "rtc-core.h"
-
 /*
  * Deprecated ABI compatibility, this should be removed at some point
  */
@@ -46,7 +44,7 @@ rtc_nvram_write(struct file *filp, struct kobject *kobj,
        return nvmem_device_write(rtc->nvmem, off, count, buf);
 }
 
-static int rtc_nvram_register(struct rtc_device *rtc)
+static int rtc_nvram_register(struct rtc_device *rtc, size_t size)
 {
        int err;
 
@@ -64,7 +62,7 @@ static int rtc_nvram_register(struct rtc_device *rtc)
 
        rtc->nvram->read = rtc_nvram_read;
        rtc->nvram->write = rtc_nvram_write;
-       rtc->nvram->size = rtc->nvmem_config->size;
+       rtc->nvram->size = size;
 
        err = sysfs_create_bin_file(&rtc->dev.parent->kobj,
                                    rtc->nvram);
@@ -84,21 +82,28 @@ static void rtc_nvram_unregister(struct rtc_device *rtc)
 /*
  * New ABI, uses nvmem
  */
-void rtc_nvmem_register(struct rtc_device *rtc)
+int rtc_nvmem_register(struct rtc_device *rtc,
+                      struct nvmem_config *nvmem_config)
 {
-       if (!rtc->nvmem_config)
-               return;
+       if (!IS_ERR_OR_NULL(rtc->nvmem))
+               return -EBUSY;
+
+       if (!nvmem_config)
+               return -ENODEV;
 
-       rtc->nvmem_config->dev = &rtc->dev;
-       rtc->nvmem_config->owner = rtc->owner;
-       rtc->nvmem = nvmem_register(rtc->nvmem_config);
+       nvmem_config->dev = rtc->dev.parent;
+       nvmem_config->owner = rtc->owner;
+       rtc->nvmem = nvmem_register(nvmem_config);
        if (IS_ERR_OR_NULL(rtc->nvmem))
-               return;
+               return PTR_ERR(rtc->nvmem);
 
        /* Register the old ABI */
        if (rtc->nvram_old_abi)
-               rtc_nvram_register(rtc);
+               rtc_nvram_register(rtc, nvmem_config->size);
+
+       return 0;
 }
+EXPORT_SYMBOL_GPL(rtc_nvmem_register);
 
 void rtc_nvmem_unregister(struct rtc_device *rtc)
 {
index 466bf7f9a285a5c455a26d199083384d93a4e121..6cbafefa80a2e4cde530895f448daa73969c4f5e 100644 (file)
@@ -134,9 +134,9 @@ static int pm80x_rtc_set_time(struct device *dev, struct rtc_time *tm)
        struct pm80x_rtc_info *info = dev_get_drvdata(dev);
        unsigned char buf[4];
        unsigned long ticks, base, data;
-       if ((tm->tm_year < 70) || (tm->tm_year > 138)) {
+       if (tm->tm_year > 206) {
                dev_dbg(info->dev,
-                       "Set time %d out of range. Please set time between 1970 to 2038.\n",
+                       "Set time %d out of range. Please set time between 1970 to 2106.\n",
                        1900 + tm->tm_year);
                return -EINVAL;
        }
index 19e53b3b8e005a430f2c1b96c8565c2a5ddf4c85..01ffc0ef8033f850b864bbc2b9b499b279426303 100644 (file)
@@ -135,9 +135,9 @@ static int pm860x_rtc_set_time(struct device *dev, struct rtc_time *tm)
        unsigned char buf[4];
        unsigned long ticks, base, data;
 
-       if ((tm->tm_year < 70) || (tm->tm_year > 138)) {
+       if (tm->tm_year > 206) {
                dev_dbg(info->dev, "Set time %d out of range. "
-                       "Please set time between 1970 to 2038.\n",
+                       "Please set time between 1970 to 2106.\n",
                        1900 + tm->tm_year);
                return -EINVAL;
        }
index ef5c16dfabfa489a0464dcf12dd519feb7a4216d..8dc45193244614b1f58fad4ad608bbf9271f6e67 100644 (file)
@@ -217,7 +217,7 @@ static int _abb5zes3_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
        struct abb5zes3_rtc_data *data = dev_get_drvdata(dev);
        u8 regs[ABB5ZES3_REG_RTC_SC + ABB5ZES3_RTC_SEC_LEN];
-       int ret;
+       int ret = 0;
 
        /*
         * As we need to read CTRL1 register anyway to access 24/12h
@@ -255,8 +255,6 @@ static int _abb5zes3_rtc_read_time(struct device *dev, struct rtc_time *tm)
        tm->tm_mon  = bcd2bin(regs[ABB5ZES3_REG_RTC_MO]) - 1; /* starts at 1 */
        tm->tm_year = bcd2bin(regs[ABB5ZES3_REG_RTC_YR]) + 100;
 
-       ret = rtc_valid_tm(tm);
-
 err:
        return ret;
 }
index 9b725c55305859b48f5e6f2ffe45088d4b6f0659..821ff52a2222e03f02c7ecc34e8d1bc6de5d18b0 100644 (file)
@@ -106,7 +106,7 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        rtc_time64_to_tm(time, tm);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
index 24a0af650a1bcf6b0c514a3cd186fb5e079ef278..e28f4401fd35a8fe3b4964254d824aaf06e30749 100644 (file)
 #define AB8500_RTC_FORCE_BKUP_REG      0x0D
 #define AB8500_RTC_CALIB_REG           0x0E
 #define AB8500_RTC_SWITCH_STAT_REG     0x0F
-#define AB8540_RTC_ALRM_SEC            0x22
-#define AB8540_RTC_ALRM_MIN_LOW_REG    0x23
-#define AB8540_RTC_ALRM_MIN_MID_REG    0x24
-#define AB8540_RTC_ALRM_MIN_HI_REG     0x25
 
 /* RtcReadRequest bits */
 #define RTC_READ_REQUEST               0x01
@@ -63,11 +59,6 @@ static const u8 ab8500_rtc_alarm_regs[] = {
        AB8500_RTC_ALRM_MIN_LOW_REG
 };
 
-static const u8 ab8540_rtc_alarm_regs[] = {
-       AB8540_RTC_ALRM_MIN_HI_REG, AB8540_RTC_ALRM_MIN_MID_REG,
-       AB8540_RTC_ALRM_MIN_LOW_REG, AB8540_RTC_ALRM_SEC
-};
-
 /* Calculate the seconds from 1970 to 01-01-2000 00:00:00 */
 static unsigned long get_elapsed_seconds(int year)
 {
@@ -131,7 +122,7 @@ static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm)
        secs += get_elapsed_seconds(AB8500_RTC_EPOCH);
 
        rtc_time_to_tm(secs, tm);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int ab8500_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -277,43 +268,6 @@ static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
        return ab8500_rtc_irq_enable(dev, alarm->enabled);
 }
 
-static int ab8540_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
-{
-       int retval, i;
-       unsigned char buf[ARRAY_SIZE(ab8540_rtc_alarm_regs)];
-       unsigned long mins, secs = 0;
-
-       if (alarm->time.tm_year < (AB8500_RTC_EPOCH - 1900)) {
-               dev_dbg(dev, "year should be equal to or greater than %d\n",
-                               AB8500_RTC_EPOCH);
-               return -EINVAL;
-       }
-
-       /* Get the number of seconds since 1970 */
-       rtc_tm_to_time(&alarm->time, &secs);
-
-       /*
-        * Convert it to the number of seconds since 01-01-2000 00:00:00
-        */
-       secs -= get_elapsed_seconds(AB8500_RTC_EPOCH);
-       mins = secs / 60;
-
-       buf[3] = secs % 60;
-       buf[2] = mins & 0xFF;
-       buf[1] = (mins >> 8) & 0xFF;
-       buf[0] = (mins >> 16) & 0xFF;
-
-       /* Set the alarm time */
-       for (i = 0; i < ARRAY_SIZE(ab8540_rtc_alarm_regs); i++) {
-               retval = abx500_set_register_interruptible(dev, AB8500_RTC,
-                       ab8540_rtc_alarm_regs[i], buf[i]);
-               if (retval < 0)
-                       return retval;
-       }
-
-       return ab8500_rtc_irq_enable(dev, alarm->enabled);
-}
-
 static int ab8500_rtc_set_calibration(struct device *dev, int calibration)
 {
        int retval;
@@ -435,17 +389,8 @@ static const struct rtc_class_ops ab8500_rtc_ops = {
        .alarm_irq_enable       = ab8500_rtc_irq_enable,
 };
 
-static const struct rtc_class_ops ab8540_rtc_ops = {
-       .read_time              = ab8500_rtc_read_time,
-       .set_time               = ab8500_rtc_set_time,
-       .read_alarm             = ab8500_rtc_read_alarm,
-       .set_alarm              = ab8540_rtc_set_alarm,
-       .alarm_irq_enable       = ab8500_rtc_irq_enable,
-};
-
 static const struct platform_device_id ab85xx_rtc_ids[] = {
        { "ab8500-rtc", (kernel_ulong_t)&ab8500_rtc_ops, },
-       { "ab8540-rtc", (kernel_ulong_t)&ab8540_rtc_ops, },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(platform, ab85xx_rtc_ids);
index b033bc556f5d29b4d803ff6506685c3dc940aee1..2cefa67a1132d171404a13b365f0fcffa19aac37 100644 (file)
@@ -172,11 +172,7 @@ static int abx80x_rtc_read_time(struct device *dev, struct rtc_time *tm)
        tm->tm_mon = bcd2bin(buf[ABX8XX_REG_MO] & 0x1F) - 1;
        tm->tm_year = bcd2bin(buf[ABX8XX_REG_YR]) + 100;
 
-       err = rtc_valid_tm(tm);
-       if (err < 0)
-               dev_err(&client->dev, "retrieved date/time is not valid.\n");
-
-       return err;
+       return 0;
 }
 
 static int abx80x_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 8ff9dc3fe5bf06ea15a2321df4a4383d32414527..3fe576fdd45e0782a1ecead2931ddf95d76d8a8d 100644 (file)
@@ -183,7 +183,29 @@ static int ac100_clkout_determine_rate(struct clk_hw *hw,
 
        for (i = 0; i < num_parents; i++) {
                struct clk_hw *parent = clk_hw_get_parent_by_index(hw, i);
-               unsigned long tmp, prate = clk_hw_get_rate(parent);
+               unsigned long tmp, prate;
+
+               /*
+                * The clock has two parents, one is a fixed clock which is
+                * internally registered by the ac100 driver. The other parent
+                * is a clock from the codec side of the chip, which we
+                * properly declare and reference in the devicetree and is
+                * not implemented in any driver right now.
+                * If the clock core looks for the parent of that second
+                * missing clock, it can't find one that is registered and
+                * returns NULL.
+                * So we end up in a situation where clk_hw_get_num_parents
+                * returns the amount of clocks we can be parented to, but
+                * clk_hw_get_parent_by_index will not return the orphan
+                * clocks.
+                * Thus we need to check if the parent exists before
+                * we get the parent rate, so we could use the RTC
+                * without waiting for the codec to be supported.
+                */
+               if (!parent)
+                       continue;
+
+               prate = clk_hw_get_rate(parent);
 
                tmp = ac100_clkout_round_rate(hw, req->rate, prate);
 
@@ -387,7 +409,7 @@ static int ac100_rtc_get_time(struct device *dev, struct rtc_time *rtc_tm)
        rtc_tm->tm_year = bcd2bin(reg[6] & AC100_RTC_YEA_MASK) +
                          AC100_YEAR_OFF;
 
-       return rtc_valid_tm(rtc_tm);
+       return 0;
 }
 
 static int ac100_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
index 7418a763ce5202b37f96b0cf26863400aa953982..ee71e647fd4338fe307b84b2c56701f6b64b5478 100644 (file)
@@ -349,6 +349,7 @@ static const struct rtc_class_ops at91_rtc_ops = {
 };
 
 static const struct regmap_config gpbr_regmap_config = {
+       .name = "gpbr",
        .reg_bits = 32,
        .val_bits = 32,
        .reg_stride = 4,
index 2ba44ccb9c3a3fe00e3e9bf2c740845001011b79..7c5530c71285bd05e429ca51c820bd21a54053c4 100644 (file)
@@ -36,7 +36,7 @@ static int au1xtoy_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        rtc_time_to_tm(t, tm);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int au1xtoy_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 98ac8d5c7901a59f438cb2ec83670caa54ce80fe..ef52741000a86e694c2c0ab7e8d25d4f5edc80ed 100644 (file)
 #define BQ32K_CFG2             0x09    /* Trickle charger control */
 #define BQ32K_TCFE             BIT(6)  /* Trickle charge FET bypass */
 
+#define MAX_LEN                        10      /* Maximum number of consecutive
+                                        * register for this particular RTC.
+                                        */
+
 struct bq32k_regs {
        uint8_t         seconds;
        uint8_t         minutes;
@@ -74,7 +78,7 @@ static int bq32k_read(struct device *dev, void *data, uint8_t off, uint8_t len)
 static int bq32k_write(struct device *dev, void *data, uint8_t off, uint8_t len)
 {
        struct i2c_client *client = to_i2c_client(dev);
-       uint8_t buffer[len + 1];
+       uint8_t buffer[MAX_LEN + 1];
 
        buffer[0] = off;
        memcpy(&buffer[1], data, len);
@@ -110,7 +114,7 @@ static int bq32k_rtc_read_time(struct device *dev, struct rtc_time *tm)
        tm->tm_year = bcd2bin(regs.years) +
                                ((regs.cent_hours & BQ32K_CENT) ? 100 : 0);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int bq32k_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 6cee61201c30cb4a15eef3fbd72a7df57313e04b..bdd6674a1054858eefcfea54d445d68d9d1ebec6 100644 (file)
@@ -60,6 +60,9 @@ static void brcmstb_waketmr_set_alarm(struct brcmstb_waketmr *timer,
 {
        brcmstb_waketmr_clear_alarm(timer);
 
+       /* Make sure we are actually counting in seconds */
+       writel_relaxed(timer->rate, timer->base + BRCMSTB_WKTMR_PRESCALER);
+
        writel_relaxed(secs + 1, timer->base + BRCMSTB_WKTMR_ALARM);
 }
 
index f7c0f72abb56f98f942cf3e65ac4401305371358..1b3738a11702465f4728a08d14d07e4902970b2e 100644 (file)
@@ -541,11 +541,10 @@ static const struct rtc_class_ops cmos_rtc_ops = {
 
 #define NVRAM_OFFSET   (RTC_REG_D + 1)
 
-static ssize_t
-cmos_nvram_read(struct file *filp, struct kobject *kobj,
-               struct bin_attribute *attr,
-               char *buf, loff_t off, size_t count)
+static int cmos_nvram_read(void *priv, unsigned int off, void *val,
+                          size_t count)
 {
+       unsigned char *buf = val;
        int     retval;
 
        off += NVRAM_OFFSET;
@@ -563,16 +562,13 @@ cmos_nvram_read(struct file *filp, struct kobject *kobj,
        return retval;
 }
 
-static ssize_t
-cmos_nvram_write(struct file *filp, struct kobject *kobj,
-               struct bin_attribute *attr,
-               char *buf, loff_t off, size_t count)
+static int cmos_nvram_write(void *priv, unsigned int off, void *val,
+                           size_t count)
 {
-       struct cmos_rtc *cmos;
+       struct cmos_rtc *cmos = priv;
+       unsigned char   *buf = val;
        int             retval;
 
-       cmos = dev_get_drvdata(container_of(kobj, struct device, kobj));
-
        /* NOTE:  on at least PCs and Ataris, the boot firmware uses a
         * checksum on part of the NVRAM data.  That's currently ignored
         * here.  If userspace is smart enough to know what fields of
@@ -598,17 +594,6 @@ cmos_nvram_write(struct file *filp, struct kobject *kobj,
        return retval;
 }
 
-static struct bin_attribute nvram = {
-       .attr = {
-               .name   = "nvram",
-               .mode   = S_IRUGO | S_IWUSR,
-       },
-
-       .read   = cmos_nvram_read,
-       .write  = cmos_nvram_write,
-       /* size gets set up later */
-};
-
 /*----------------------------------------------------------------*/
 
 static struct cmos_rtc cmos_rtc;
@@ -675,6 +660,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
        unsigned char                   rtc_control;
        unsigned                        address_space;
        u32                             flags = 0;
+       struct nvmem_config nvmem_cfg = {
+               .name = "cmos_nvram",
+               .word_size = 1,
+               .stride = 1,
+               .reg_read = cmos_nvram_read,
+               .reg_write = cmos_nvram_write,
+               .priv = &cmos_rtc,
+       };
 
        /* there can be only one ... */
        if (cmos_rtc.dev)
@@ -751,8 +744,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
        cmos_rtc.dev = dev;
        dev_set_drvdata(dev, &cmos_rtc);
 
-       cmos_rtc.rtc = rtc_device_register(driver_name, dev,
-                               &cmos_rtc_ops, THIS_MODULE);
+       cmos_rtc.rtc = devm_rtc_allocate_device(dev);
        if (IS_ERR(cmos_rtc.rtc)) {
                retval = PTR_ERR(cmos_rtc.rtc);
                goto cleanup0;
@@ -814,22 +806,25 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                }
        }
 
-       /* export at least the first block of NVRAM */
-       nvram.size = address_space - NVRAM_OFFSET;
-       retval = sysfs_create_bin_file(&dev->kobj, &nvram);
-       if (retval < 0) {
-               dev_dbg(dev, "can't create nvram file? %d\n", retval);
+       cmos_rtc.rtc->ops = &cmos_rtc_ops;
+       cmos_rtc.rtc->nvram_old_abi = true;
+       retval = rtc_register_device(cmos_rtc.rtc);
+       if (retval)
                goto cleanup2;
-       }
 
-       dev_info(dev, "%s%s, %zd bytes nvram%s\n",
-               !is_valid_irq(rtc_irq) ? "no alarms" :
-                       cmos_rtc.mon_alrm ? "alarms up to one year" :
-                       cmos_rtc.day_alrm ? "alarms up to one month" :
-                       "alarms up to one day",
-               cmos_rtc.century ? ", y3k" : "",
-               nvram.size,
-               is_hpet_enabled() ? ", hpet irqs" : "");
+       /* export at least the first block of NVRAM */
+       nvmem_cfg.size = address_space - NVRAM_OFFSET;
+       if (rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg))
+               dev_err(dev, "nvmem registration failed\n");
+
+       dev_info(dev, "%s%s, %d bytes nvram%s\n",
+                !is_valid_irq(rtc_irq) ? "no alarms" :
+                cmos_rtc.mon_alrm ? "alarms up to one year" :
+                cmos_rtc.day_alrm ? "alarms up to one month" :
+                "alarms up to one day",
+                cmos_rtc.century ? ", y3k" : "",
+                nvmem_cfg.size,
+                is_hpet_enabled() ? ", hpet irqs" : "");
 
        return 0;
 
@@ -838,7 +833,6 @@ cleanup2:
                free_irq(rtc_irq, cmos_rtc.rtc);
 cleanup1:
        cmos_rtc.dev = NULL;
-       rtc_device_unregister(cmos_rtc.rtc);
 cleanup0:
        if (RTC_IOMAPPED)
                release_region(ports->start, resource_size(ports));
@@ -862,14 +856,11 @@ static void cmos_do_remove(struct device *dev)
 
        cmos_do_shutdown(cmos->irq);
 
-       sysfs_remove_bin_file(&dev->kobj, &nvram);
-
        if (is_valid_irq(cmos->irq)) {
                free_irq(cmos->irq, cmos->rtc);
                hpet_unregister_irq_handler(cmos_interrupt);
        }
 
-       rtc_device_unregister(cmos->rtc);
        cmos->rtc = NULL;
 
        ports = cmos->iomem;
@@ -1271,8 +1262,6 @@ MODULE_DEVICE_TABLE(of, of_cmos_match);
 static __init void cmos_of_init(struct platform_device *pdev)
 {
        struct device_node *node = pdev->dev.of_node;
-       struct rtc_time time;
-       int ret;
        const __be32 *val;
 
        if (!node)
@@ -1285,16 +1274,6 @@ static __init void cmos_of_init(struct platform_device *pdev)
        val = of_get_property(node, "freq-reg", NULL);
        if (val)
                CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT);
-
-       cmos_read_time(&pdev->dev, &time);
-       ret = rtc_valid_tm(&time);
-       if (ret) {
-               struct rtc_time def_time = {
-                       .tm_year = 1,
-                       .tm_mday = 1,
-               };
-               cmos_set_time(&pdev->dev, &def_time);
-       }
 }
 #else
 static inline void cmos_of_init(struct platform_device *pdev) {}
index cfc4141d99cde18def976813feba11113111a378..2fc517498a5d85688bce3b28268670508ea176f6 100644 (file)
@@ -82,7 +82,7 @@ static int coh901331_read_time(struct device *dev, struct rtc_time *tm)
        if (readl(rtap->virtbase + COH901331_VALID)) {
                rtc_time_to_tm(readl(rtap->virtbase + COH901331_CUR_TIME), tm);
                clk_disable(rtap->clk);
-               return rtc_valid_tm(tm);
+               return 0;
        }
        clk_disable(rtap->clk);
        return -EINVAL;
index 513b9bedd2c823f617c769ab1581d9aa33649702..0abf98983e13f6353e75e7d732d78a361b189dd7 100644 (file)
@@ -46,11 +46,3 @@ static inline const struct attribute_group **rtc_get_dev_attribute_groups(void)
        return NULL;
 }
 #endif
-
-#ifdef CONFIG_RTC_NVMEM
-void rtc_nvmem_register(struct rtc_device *rtc);
-void rtc_nvmem_unregister(struct rtc_device *rtc);
-#else
-static inline void rtc_nvmem_register(struct rtc_device *rtc) {}
-static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
-#endif
index 3a0333e1f21a490a35d64c4e225d7536c7a758b8..a8856f2b9bc22a0409ad2ed95144335113e70894 100644 (file)
@@ -119,7 +119,7 @@ static int cpcap_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        cpcap2rtc_time(tm, &cpcap_tm);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int cpcap_rtc_set_time(struct device *dev, struct rtc_time *tm)
index f0ea6899c7319997772ae275c0adcfdcf4f54f50..bf7ced095c94c7726c0ecebe90d4d9f15ba23c7c 100644 (file)
@@ -197,10 +197,10 @@ static int cros_ec_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
                cros_ec_rtc->saved_alarm = (u32)alarm_time;
        } else {
                /* Don't set an alarm in the past. */
-               if ((u32)alarm_time < current_time)
-                       alarm_offset = EC_RTC_ALARM_CLEAR;
-               else
-                       alarm_offset = (u32)alarm_time - current_time;
+               if ((u32)alarm_time <= current_time)
+                       return -ETIME;
+
+               alarm_offset = (u32)alarm_time - current_time;
        }
 
        ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, alarm_offset);
index 4273377562ec64dcdf640506fe4b4dd83b413de0..03044e1bc4974ff7e28957413dc4a8d99a0e95ef 100644 (file)
@@ -187,8 +187,7 @@ static int da9052_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
                        rtc_tm->tm_min  = v[0][1] & DA9052_RTC_MIN;
                        rtc_tm->tm_sec  = v[0][0] & DA9052_RTC_SEC;
 
-                       ret = rtc_valid_tm(rtc_tm);
-                       return ret;
+                       return 0;
                }
 
                idx = (1-idx);
index 678af8648c45316f22ec7e4d124d1758436022d6..e08cd8130c23013b3d7ab2586b638bc5434c0dce 100644 (file)
@@ -158,7 +158,7 @@ static int da9055_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
        rtc_tm->tm_min  = v[1] & DA9055_RTC_MIN;
        rtc_tm->tm_sec  = v[0] & DA9055_RTC_SEC;
 
-       return rtc_valid_tm(rtc_tm);
+       return 0;
 }
 
 static int da9055_rtc_set_time(struct device *dev, struct rtc_time *tm)
index f85cae240f123eafde246ba1c084c8b6943d171e..b4e054c64bad9e54d23adb3a47da1008224906b3 100644 (file)
@@ -256,7 +256,7 @@ static int da9063_rtc_read_time(struct device *dev, struct rtc_time *tm)
        else
                rtc->rtc_sync = false;
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int da9063_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 9c82b1da2d4593a592f2ddff286556bd6da15ef5..5f158715fb4c96e16d62d7f8eee4e200fdb267f7 100644 (file)
@@ -99,7 +99,7 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm)
        if (tm->tm_year < 70)
                tm->tm_year += 100;
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
index ef75c349dff9cf3da5a5c2c692dc4e61b3fe79bf..0744916b79c505836abac02ad556bf5a5a39cf05 100644 (file)
@@ -211,7 +211,7 @@ static int ds1286_read_time(struct device *dev, struct rtc_time *tm)
 
        tm->tm_mon--;
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int ds1286_set_time(struct device *dev, struct rtc_time *tm)
index 0ec4be62322bfaf7695eb99aa6efd428264a3771..2a881150d51c282802fc168b992448f37b0af66e 100644 (file)
@@ -43,7 +43,7 @@ static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *time)
 {
        struct spi_device       *spi = dev_get_drvdata(dev);
        u8              buf[1 + RTC_CLCK_LEN];
-       u8              *bp = buf;
+       u8              *bp;
        int             status;
 
        /* Enable writing */
@@ -98,8 +98,7 @@ static int ds1302_rtc_get_time(struct device *dev, struct rtc_time *time)
        time->tm_mon = bcd2bin(buf[RTC_ADDR_MON]) - 1;
        time->tm_year = bcd2bin(buf[RTC_ADDR_YEAR]) + 100;
 
-       /* Time may not be set */
-       return rtc_valid_tm(time);
+       return 0;
 }
 
 static const struct rtc_class_ops ds1302_rtc_ops = {
@@ -112,7 +111,7 @@ static int ds1302_probe(struct spi_device *spi)
        struct rtc_device       *rtc;
        u8              addr;
        u8              buf[4];
-       u8              *bp = buf;
+       u8              *bp;
        int             status;
 
        /* Sanity check board setup data.  This may be hooked up
index d8df2e9e14adb94e45a01655ee773cb76f10e28a..2d502fc85698e6203db082327b0cfac806d208be 100644 (file)
@@ -203,8 +203,7 @@ static int ds1305_get_time(struct device *dev, struct rtc_time *time)
                time->tm_hour, time->tm_mday,
                time->tm_mon, time->tm_year, time->tm_wday);
 
-       /* Time may not be set */
-       return rtc_valid_tm(time);
+       return 0;
 }
 
 static int ds1305_set_time(struct device *dev, struct rtc_time *time)
@@ -544,15 +543,6 @@ static int ds1305_nvram_write(void *priv, unsigned int off, void *buf,
        return spi_sync(spi, &m);
 }
 
-static struct nvmem_config ds1305_nvmem_cfg = {
-       .name = "ds1305_nvram",
-       .word_size = 1,
-       .stride = 1,
-       .size = DS1305_NVRAM_LEN,
-       .reg_read = ds1305_nvram_read,
-       .reg_write = ds1305_nvram_write,
-};
-
 /*----------------------------------------------------------------------*/
 
 /*
@@ -566,6 +556,14 @@ static int ds1305_probe(struct spi_device *spi)
        u8                              addr, value;
        struct ds1305_platform_data     *pdata = dev_get_platdata(&spi->dev);
        bool                            write_ctrl = false;
+       struct nvmem_config ds1305_nvmem_cfg = {
+               .name = "ds1305_nvram",
+               .word_size = 1,
+               .stride = 1,
+               .size = DS1305_NVRAM_LEN,
+               .reg_read = ds1305_nvram_read,
+               .reg_write = ds1305_nvram_write,
+       };
 
        /* Sanity check board setup data.  This may be hooked up
         * in 3wire mode, but we don't care.  Note that unless
@@ -703,15 +701,15 @@ static int ds1305_probe(struct spi_device *spi)
        ds1305->rtc->ops = &ds1305_ops;
 
        ds1305_nvmem_cfg.priv = ds1305;
-       ds1305->rtc->nvmem_config = &ds1305_nvmem_cfg;
        ds1305->rtc->nvram_old_abi = true;
-
        status = rtc_register_device(ds1305->rtc);
        if (status) {
                dev_dbg(&spi->dev, "register rtc --> %d\n", status);
                return status;
        }
 
+       rtc_nvmem_register(ds1305->rtc, &ds1305_nvmem_cfg);
+
        /* Maybe set up alarm IRQ; be ready to handle it triggering right
         * away.  NOTE that we don't share this.  The signal is active low,
         * and we can't ack it before a SPI message delay.  We temporarily
index 923dde912f604094219c5860794080888308df1f..a13e59edff530d147a44574f06b351e72d3621f5 100644 (file)
@@ -114,7 +114,6 @@ enum ds_type {
 #      define RX8025_BIT_XST           0x20
 
 struct ds1307 {
-       struct nvmem_config     nvmem_cfg;
        enum ds_type            type;
        unsigned long           flags;
 #define HAS_NVRAM      0               /* bit 0 == sysfs file active */
@@ -438,8 +437,7 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
                t->tm_hour, t->tm_mday,
                t->tm_mon, t->tm_year, t->tm_wday);
 
-       /* initial clock setting can be undefined */
-       return rtc_valid_tm(t);
+       return 0;
 }
 
 static int ds1307_set_time(struct device *dev, struct rtc_time *t)
@@ -1696,24 +1694,26 @@ read_rtc:
                }
        }
 
-       if (chip->nvram_size) {
-               ds1307->nvmem_cfg.name = "ds1307_nvram";
-               ds1307->nvmem_cfg.word_size = 1;
-               ds1307->nvmem_cfg.stride = 1;
-               ds1307->nvmem_cfg.size = chip->nvram_size;
-               ds1307->nvmem_cfg.reg_read = ds1307_nvram_read;
-               ds1307->nvmem_cfg.reg_write = ds1307_nvram_write;
-               ds1307->nvmem_cfg.priv = ds1307;
-
-               ds1307->rtc->nvmem_config = &ds1307->nvmem_cfg;
-               ds1307->rtc->nvram_old_abi = true;
-       }
-
        ds1307->rtc->ops = chip->rtc_ops ?: &ds13xx_rtc_ops;
        err = rtc_register_device(ds1307->rtc);
        if (err)
                return err;
 
+       if (chip->nvram_size) {
+               struct nvmem_config nvmem_cfg = {
+                       .name = "ds1307_nvram",
+                       .word_size = 1,
+                       .stride = 1,
+                       .size = chip->nvram_size,
+                       .reg_read = ds1307_nvram_read,
+                       .reg_write = ds1307_nvram_write,
+                       .priv = ds1307,
+               };
+
+               ds1307->rtc->nvram_old_abi = true;
+               rtc_nvmem_register(ds1307->rtc, &nvmem_cfg);
+       }
+
        ds1307_hwmon_register(ds1307);
        ds1307_clks_register(ds1307);
 
index 895fbeeb47fe1bc78fc79350e6e58b23c50ab64e..5208da4cf94ab2b0e69d374836acb63e83916ca6 100644 (file)
@@ -153,120 +153,22 @@ static ssize_t ds1343_store_glitchfilter(struct device *dev,
 static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter,
                        ds1343_store_glitchfilter);
 
-static ssize_t ds1343_nvram_write(struct file *filp, struct kobject *kobj,
-                       struct bin_attribute *attr,
-                       char *buf, loff_t off, size_t count)
+static int ds1343_nvram_write(void *priv, unsigned int off, void *val,
+                             size_t bytes)
 {
-       int ret;
-       unsigned char address;
-       struct device *dev = kobj_to_dev(kobj);
-       struct ds1343_priv *priv = dev_get_drvdata(dev);
-
-       address = DS1343_NVRAM + off;
-
-       ret = regmap_bulk_write(priv->map, address, buf, count);
-       if (ret < 0)
-               dev_err(&priv->spi->dev, "Error in nvram write %d", ret);
+       struct ds1343_priv *ds1343 = priv;
 
-       return (ret < 0) ? ret : count;
+       return regmap_bulk_write(ds1343->map, DS1343_NVRAM + off, val, bytes);
 }
 
-
-static ssize_t ds1343_nvram_read(struct file *filp, struct kobject *kobj,
-                               struct bin_attribute *attr,
-                               char *buf, loff_t off, size_t count)
+static int ds1343_nvram_read(void *priv, unsigned int off, void *val,
+                            size_t bytes)
 {
-       int ret;
-       unsigned char address;
-       struct device *dev = kobj_to_dev(kobj);
-       struct ds1343_priv *priv = dev_get_drvdata(dev);
+       struct ds1343_priv *ds1343 = priv;
 
-       address = DS1343_NVRAM + off;
-
-       ret = regmap_bulk_read(priv->map, address, buf, count);
-       if (ret < 0)
-               dev_err(&priv->spi->dev, "Error in nvram read %d\n", ret);
-
-       return (ret < 0) ? ret : count;
+       return regmap_bulk_read(ds1343->map, DS1343_NVRAM + off, val, bytes);
 }
 
-
-static struct bin_attribute nvram_attr = {
-       .attr.name      = "nvram",
-       .attr.mode      = S_IRUGO | S_IWUSR,
-       .read           = ds1343_nvram_read,
-       .write          = ds1343_nvram_write,
-       .size           = DS1343_NVRAM_LEN,
-};
-
-static ssize_t ds1343_show_alarmstatus(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct ds1343_priv *priv = dev_get_drvdata(dev);
-       int alarmstatus, data;
-
-       regmap_read(priv->map, DS1343_CONTROL_REG, &data);
-
-       alarmstatus = !!(data & DS1343_A0IE);
-
-       if (alarmstatus)
-               return sprintf(buf, "enabled\n");
-       else
-               return sprintf(buf, "disabled\n");
-}
-
-static DEVICE_ATTR(alarm_status, S_IRUGO, ds1343_show_alarmstatus, NULL);
-
-static ssize_t ds1343_show_alarmmode(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct ds1343_priv *priv = dev_get_drvdata(dev);
-       int alarm_mode, data;
-       char *alarm_str;
-
-       regmap_read(priv->map, DS1343_ALM0_SEC_REG, &data);
-       alarm_mode = (data & 0x80) >> 4;
-
-       regmap_read(priv->map, DS1343_ALM0_MIN_REG, &data);
-       alarm_mode |= (data & 0x80) >> 5;
-
-       regmap_read(priv->map, DS1343_ALM0_HOUR_REG, &data);
-       alarm_mode |= (data & 0x80) >> 6;
-
-       regmap_read(priv->map, DS1343_ALM0_DAY_REG, &data);
-       alarm_mode |= (data & 0x80) >> 7;
-
-       switch (alarm_mode) {
-       case 15:
-               alarm_str = "each second";
-               break;
-
-       case 7:
-               alarm_str = "seconds match";
-               break;
-
-       case 3:
-               alarm_str = "minutes and seconds match";
-               break;
-
-       case 1:
-               alarm_str = "hours, minutes and seconds match";
-               break;
-
-       case 0:
-               alarm_str = "day, hours, minutes and seconds match";
-               break;
-
-       default:
-               alarm_str = "invalid";
-               break;
-       }
-
-       return sprintf(buf, "%s\n", alarm_str);
-}
-
-static DEVICE_ATTR(alarm_mode, S_IRUGO, ds1343_show_alarmmode, NULL);
-
 static ssize_t ds1343_show_tricklecharger(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
@@ -313,7 +215,6 @@ static DEVICE_ATTR(trickle_charger, S_IRUGO, ds1343_show_tricklecharger, NULL);
 
 static int ds1343_sysfs_register(struct device *dev)
 {
-       struct ds1343_priv *priv = dev_get_drvdata(dev);
        int err;
 
        err = device_create_file(dev, &dev_attr_glitch_filter);
@@ -321,33 +222,9 @@ static int ds1343_sysfs_register(struct device *dev)
                return err;
 
        err = device_create_file(dev, &dev_attr_trickle_charger);
-       if (err)
-               goto error1;
-
-       err = device_create_bin_file(dev, &nvram_attr);
-       if (err)
-               goto error2;
-
-       if (priv->irq <= 0)
-               return err;
-
-       err = device_create_file(dev, &dev_attr_alarm_mode);
-       if (err)
-               goto error3;
-
-       err = device_create_file(dev, &dev_attr_alarm_status);
        if (!err)
-               return err;
+               return 0;
 
-       device_remove_file(dev, &dev_attr_alarm_mode);
-
-error3:
-       device_remove_bin_file(dev, &nvram_attr);
-
-error2:
-       device_remove_file(dev, &dev_attr_trickle_charger);
-
-error1:
        device_remove_file(dev, &dev_attr_glitch_filter);
 
        return err;
@@ -355,17 +232,8 @@ error1:
 
 static void ds1343_sysfs_unregister(struct device *dev)
 {
-       struct ds1343_priv *priv = dev_get_drvdata(dev);
-
        device_remove_file(dev, &dev_attr_glitch_filter);
        device_remove_file(dev, &dev_attr_trickle_charger);
-       device_remove_bin_file(dev, &nvram_attr);
-
-       if (priv->irq <= 0)
-               return;
-
-       device_remove_file(dev, &dev_attr_alarm_status);
-       device_remove_file(dev, &dev_attr_alarm_mode);
 }
 
 static int ds1343_read_time(struct device *dev, struct rtc_time *dt)
@@ -386,7 +254,7 @@ static int ds1343_read_time(struct device *dev, struct rtc_time *dt)
        dt->tm_mon      = bcd2bin(buf[5] & 0x1F) - 1;
        dt->tm_year     = bcd2bin(buf[6]) + 100; /* year offset from 1900 */
 
-       return rtc_valid_tm(dt);
+       return 0;
 }
 
 static int ds1343_set_time(struct device *dev, struct rtc_time *dt)
@@ -599,14 +467,18 @@ static const struct rtc_class_ops ds1343_rtc_ops = {
 static int ds1343_probe(struct spi_device *spi)
 {
        struct ds1343_priv *priv;
-       struct regmap_config config;
+       struct regmap_config config = { .reg_bits = 8, .val_bits = 8,
+                                       .write_flag_mask = 0x80, };
        unsigned int data;
        int res;
-
-       memset(&config, 0, sizeof(config));
-       config.reg_bits = 8;
-       config.val_bits = 8;
-       config.write_flag_mask = 0x80;
+       struct nvmem_config nvmem_cfg = {
+               .name = "ds1343-",
+               .word_size = 1,
+               .stride = 1,
+               .size = DS1343_NVRAM_LEN,
+               .reg_read = ds1343_nvram_read,
+               .reg_write = ds1343_nvram_write,
+       };
 
        priv = devm_kzalloc(&spi->dev, sizeof(struct ds1343_priv), GFP_KERNEL);
        if (!priv)
@@ -646,12 +518,19 @@ static int ds1343_probe(struct spi_device *spi)
        data &= ~(DS1343_OSF | DS1343_IRQF1 | DS1343_IRQF0);
        regmap_write(priv->map, DS1343_STATUS_REG, data);
 
-       priv->rtc = devm_rtc_device_register(&spi->dev, "ds1343",
-                                       &ds1343_rtc_ops, THIS_MODULE);
-       if (IS_ERR(priv->rtc)) {
-               dev_err(&spi->dev, "unable to register rtc ds1343\n");
+       priv->rtc = devm_rtc_allocate_device(&spi->dev);
+       if (IS_ERR(priv->rtc))
                return PTR_ERR(priv->rtc);
-       }
+
+       priv->rtc->nvram_old_abi = true;
+       priv->rtc->ops = &ds1343_rtc_ops;
+
+       res = rtc_register_device(priv->rtc);
+       if (res)
+               return res;
+
+       nvmem_cfg.priv = priv;
+       rtc_nvmem_register(priv->rtc, &nvmem_cfg);
 
        priv->irq = spi->irq;
 
index ccfc9d43eb1e680956d32892e9bacfaddefafd1a..938512c676eed4247bc0e1e15f67f6d36ad54937 100644 (file)
@@ -66,7 +66,7 @@ static int ds1347_read_time(struct device *dev, struct rtc_time *dt)
        dt->tm_wday = bcd2bin(buf[5]) - 1;
        dt->tm_year = bcd2bin(buf[6]) + 100;
 
-       return rtc_valid_tm(dt);
+       return 0;
 }
 
 static int ds1347_set_time(struct device *dev, struct rtc_time *dt)
index 4d5b007d7fc68cfbe71cc6f4c577b5bc4cc357e3..3b095401f848906184ecfd6ef75644ca73eaead2 100644 (file)
@@ -153,7 +153,7 @@ static int ds1390_read_time(struct device *dev, struct rtc_time *dt)
        /* adjust for century bit */
        dt->tm_year = bcd2bin(chip->txrx_buf[6]) + ((chip->txrx_buf[5] & 0x80) ? 100 : 0);
 
-       return rtc_valid_tm(dt);
+       return 0;
 }
 
 static int ds1390_set_time(struct device *dev, struct rtc_time *dt)
index 1e95312a6f2eecdce510a7cdd79a2b59fa37f037..a7d5ca428d6828692e5dc79c67c76821ea440315 100644 (file)
@@ -277,10 +277,6 @@ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
 
        rtc_tm->tm_mon--;
 
-       if (rtc_valid_tm(rtc_tm) < 0) {
-               dev_err(dev, "retrieved date/time is not valid.\n");
-               rtc_time_to_tm(0, rtc_tm);
-       }
        return 0;
 }
 
@@ -422,20 +418,20 @@ static int ds1511_nvram_write(void *priv, unsigned int pos, void *buf,
        return 0;
 }
 
-static struct nvmem_config ds1511_nvmem_cfg = {
-       .name = "ds1511_nvram",
-       .word_size = 1,
-       .stride = 1,
-       .size = DS1511_RAM_MAX,
-       .reg_read = ds1511_nvram_read,
-       .reg_write = ds1511_nvram_write,
-};
-
 static int ds1511_rtc_probe(struct platform_device *pdev)
 {
        struct resource *res;
        struct rtc_plat_data *pdata;
        int ret = 0;
+       struct nvmem_config ds1511_nvmem_cfg = {
+               .name = "ds1511_nvram",
+               .word_size = 1,
+               .stride = 1,
+               .size = DS1511_RAM_MAX,
+               .reg_read = ds1511_nvram_read,
+               .reg_write = ds1511_nvram_write,
+               .priv = &pdev->dev,
+       };
 
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
@@ -478,14 +474,14 @@ static int ds1511_rtc_probe(struct platform_device *pdev)
 
        pdata->rtc->ops = &ds1511_rtc_ops;
 
-       ds1511_nvmem_cfg.priv = &pdev->dev;
-       pdata->rtc->nvmem_config = &ds1511_nvmem_cfg;
        pdata->rtc->nvram_old_abi = true;
 
        ret = rtc_register_device(pdata->rtc);
        if (ret)
                return ret;
 
+       rtc_nvmem_register(pdata->rtc, &ds1511_nvmem_cfg);
+
        /*
         * if the platform has an interrupt in mind for this device,
         * then by all means, set it
index 9961ec646fd2f0c2766cf809e7eecd9202085d51..2441b9a2b3662605183263507e75ce071dddf6ae 100644 (file)
@@ -127,10 +127,6 @@ static int ds1553_rtc_read_time(struct device *dev, struct rtc_time *tm)
        /* year is 1900 + tm->tm_year */
        tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-       if (rtc_valid_tm(tm) < 0) {
-               dev_err(dev, "retrieved date/time is not valid.\n");
-               rtc_time_to_tm(0, tm);
-       }
        return 0;
 }
 
@@ -233,46 +229,32 @@ static const struct rtc_class_ops ds1553_rtc_ops = {
        .alarm_irq_enable       = ds1553_rtc_alarm_irq_enable,
 };
 
-static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
-                                struct bin_attribute *bin_attr,
-                                char *buf, loff_t pos, size_t size)
+static int ds1553_nvram_read(void *priv, unsigned int pos, void *val,
+                            size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct platform_device *pdev = to_platform_device(dev);
+       struct platform_device *pdev = priv;
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
        void __iomem *ioaddr = pdata->ioaddr;
-       ssize_t count;
+       u8 *buf = val;
 
-       for (count = 0; count < size; count++)
+       for (; bytes; bytes--)
                *buf++ = readb(ioaddr + pos++);
-       return count;
+       return 0;
 }
 
-static ssize_t ds1553_nvram_write(struct file *filp, struct kobject *kobj,
-                                 struct bin_attribute *bin_attr,
-                                 char *buf, loff_t pos, size_t size)
+static int ds1553_nvram_write(void *priv, unsigned int pos, void *val,
+                             size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct platform_device *pdev = to_platform_device(dev);
+       struct platform_device *pdev = priv;
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
        void __iomem *ioaddr = pdata->ioaddr;
-       ssize_t count;
+       u8 *buf = val;
 
-       for (count = 0; count < size; count++)
+       for (; bytes; bytes--)
                writeb(*buf++, ioaddr + pos++);
-       return count;
+       return 0;
 }
 
-static struct bin_attribute ds1553_nvram_attr = {
-       .attr = {
-               .name = "nvram",
-               .mode = S_IRUGO | S_IWUSR,
-       },
-       .size = RTC_OFFSET,
-       .read = ds1553_nvram_read,
-       .write = ds1553_nvram_write,
-};
-
 static int ds1553_rtc_probe(struct platform_device *pdev)
 {
        struct resource *res;
@@ -280,6 +262,15 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
        struct rtc_plat_data *pdata;
        void __iomem *ioaddr;
        int ret = 0;
+       struct nvmem_config nvmem_cfg = {
+               .name = "ds1553_nvram",
+               .word_size = 1,
+               .stride = 1,
+               .size = RTC_OFFSET,
+               .reg_read = ds1553_nvram_read,
+               .reg_write = ds1553_nvram_write,
+               .priv = pdev,
+       };
 
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
@@ -308,11 +299,17 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
        pdata->last_jiffies = jiffies;
        platform_set_drvdata(pdev, pdata);
 
-       pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-                                 &ds1553_rtc_ops, THIS_MODULE);
+       pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(pdata->rtc))
                return PTR_ERR(pdata->rtc);
 
+       pdata->rtc->ops = &ds1553_rtc_ops;
+       pdata->rtc->nvram_old_abi = true;
+
+       ret = rtc_register_device(pdata->rtc);
+       if (ret)
+               return ret;
+
        if (pdata->irq > 0) {
                writeb(0, ioaddr + RTC_INTERRUPTS);
                if (devm_request_irq(&pdev->dev, pdata->irq,
@@ -323,21 +320,9 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
                }
        }
 
-       ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
-       if (ret)
-               dev_err(&pdev->dev, "unable to create sysfs file: %s\n",
-                       ds1553_nvram_attr.attr.name);
-
-       return 0;
-}
-
-static int ds1553_rtc_remove(struct platform_device *pdev)
-{
-       struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+       if (rtc_nvmem_register(pdata->rtc, &nvmem_cfg))
+               dev_err(&pdev->dev, "unable to register nvmem\n");
 
-       sysfs_remove_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
-       if (pdata->irq > 0)
-               writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
        return 0;
 }
 
@@ -346,7 +331,6 @@ MODULE_ALIAS("platform:rtc-ds1553");
 
 static struct platform_driver ds1553_rtc_driver = {
        .probe          = ds1553_rtc_probe,
-       .remove         = ds1553_rtc_remove,
        .driver         = {
                .name   = "rtc-ds1553",
        },
index ed43b431166064770c7486498f75a55a10fe5689..1a39829d2b40358b9313b5e79d1da10f2da5a105 100644 (file)
@@ -306,7 +306,7 @@ ds1685_rtc_read_time(struct device *dev, struct rtc_time *tm)
        tm->tm_yday  = rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year);
        tm->tm_isdst = 0; /* RTC has hardcoded timezone, so don't use. */
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 /**
index 3abf1cbfb8cee2f57e19f167f2adaf1df1cade17..2d781180e968a12950a2c5fcf9f2f7fa123aa13d 100644 (file)
@@ -53,9 +53,7 @@
 struct rtc_plat_data {
        void __iomem *ioaddr_nvram;
        void __iomem *ioaddr_rtc;
-       size_t size_nvram;
        unsigned long last_jiffies;
-       struct bin_attribute nvram_attr;
 };
 
 static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -114,7 +112,7 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm)
        /* year is 1900 + tm->tm_year */
        tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static const struct rtc_class_ops ds1742_rtc_ops = {
@@ -122,34 +120,28 @@ static const struct rtc_class_ops ds1742_rtc_ops = {
        .set_time       = ds1742_rtc_set_time,
 };
 
-static ssize_t ds1742_nvram_read(struct file *filp, struct kobject *kobj,
-                                struct bin_attribute *bin_attr,
-                                char *buf, loff_t pos, size_t size)
+static int ds1742_nvram_read(void *priv, unsigned int pos, void *val,
+                            size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+       struct rtc_plat_data *pdata = priv;
        void __iomem *ioaddr = pdata->ioaddr_nvram;
-       ssize_t count;
+       u8 *buf = val;
 
-       for (count = 0; count < size; count++)
+       for (; bytes; bytes--)
                *buf++ = readb(ioaddr + pos++);
-       return count;
+       return 0;
 }
 
-static ssize_t ds1742_nvram_write(struct file *filp, struct kobject *kobj,
-                                 struct bin_attribute *bin_attr,
-                                 char *buf, loff_t pos, size_t size)
+static int ds1742_nvram_write(void *priv, unsigned int pos, void *val,
+                             size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+       struct rtc_plat_data *pdata = priv;
        void __iomem *ioaddr = pdata->ioaddr_nvram;
-       ssize_t count;
+       u8 *buf = val;
 
-       for (count = 0; count < size; count++)
+       for (; bytes; bytes--)
                writeb(*buf++, ioaddr + pos++);
-       return count;
+       return 0;
 }
 
 static int ds1742_rtc_probe(struct platform_device *pdev)
@@ -160,6 +152,14 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
        struct rtc_plat_data *pdata;
        void __iomem *ioaddr;
        int ret = 0;
+       struct nvmem_config nvmem_cfg = {
+               .name = "ds1742_nvram",
+               .word_size = 1,
+               .stride = 1,
+               .reg_read = ds1742_nvram_read,
+               .reg_write = ds1742_nvram_write,
+       };
+
 
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
@@ -171,15 +171,10 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(ioaddr);
 
        pdata->ioaddr_nvram = ioaddr;
-       pdata->size_nvram = resource_size(res) - RTC_SIZE;
-       pdata->ioaddr_rtc = ioaddr + pdata->size_nvram;
+       pdata->ioaddr_rtc = ioaddr + resource_size(res) - RTC_SIZE;
 
-       sysfs_bin_attr_init(&pdata->nvram_attr);
-       pdata->nvram_attr.attr.name = "nvram";
-       pdata->nvram_attr.attr.mode = S_IRUGO | S_IWUSR;
-       pdata->nvram_attr.read = ds1742_nvram_read;
-       pdata->nvram_attr.write = ds1742_nvram_write;
-       pdata->nvram_attr.size = pdata->size_nvram;
+       nvmem_cfg.size = resource_size(res) - RTC_SIZE;
+       nvmem_cfg.priv = pdata;
 
        /* turn RTC on if it was not on */
        ioaddr = pdata->ioaddr_rtc;
@@ -196,24 +191,21 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
 
        pdata->last_jiffies = jiffies;
        platform_set_drvdata(pdev, pdata);
-       rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-                                 &ds1742_rtc_ops, THIS_MODULE);
+
+       rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(rtc))
                return PTR_ERR(rtc);
 
-       ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
-       if (ret)
-               dev_err(&pdev->dev, "Unable to create sysfs entry: %s\n",
-                       pdata->nvram_attr.attr.name);
+       rtc->ops = &ds1742_rtc_ops;
+       rtc->nvram_old_abi = true;
 
-       return 0;
-}
+       ret = rtc_register_device(rtc);
+       if (ret)
+               return ret;
 
-static int ds1742_rtc_remove(struct platform_device *pdev)
-{
-       struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+       if (rtc_nvmem_register(rtc, &nvmem_cfg))
+               dev_err(&pdev->dev, "Unable to register nvmem\n");
 
-       sysfs_remove_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
        return 0;
 }
 
@@ -225,7 +217,6 @@ MODULE_DEVICE_TABLE(of, ds1742_rtc_of_match);
 
 static struct platform_driver ds1742_rtc_driver = {
        .probe          = ds1742_rtc_probe,
-       .remove         = ds1742_rtc_remove,
        .driver         = {
                .name   = "rtc-ds1742",
                .of_match_table = of_match_ptr(ds1742_rtc_of_match),
index 9a1582ed7070a0ffc9d75d86f353d8383b7594ab..b886b6a5c1785835f3d934500e31e0e2d61a15fc 100644 (file)
@@ -207,7 +207,7 @@ static int ds2404_read_time(struct device *dev, struct rtc_time *dt)
        time = le32_to_cpu(time);
 
        rtc_time_to_tm(time, dt);
-       return rtc_valid_tm(dt);
+       return 0;
 }
 
 static int ds2404_set_mmss(struct device *dev, unsigned long secs)
index 0550f7ba464f414d068985d1c58b2d0029754d26..7184e5145f12da07e01a837cef7bf1376cff75fa 100644 (file)
@@ -145,7 +145,7 @@ static int ds3232_read_time(struct device *dev, struct rtc_time *time)
 
        time->tm_year = bcd2bin(year) + add_century;
 
-       return rtc_valid_tm(time);
+       return 0;
 }
 
 static int ds3232_set_time(struct device *dev, struct rtc_time *time)
index 0130afd7fe889e5767660292c2d2f87d58ad2725..3454e7814524a3defaa3ef1c25da4132fa825bb7 100644 (file)
@@ -176,7 +176,7 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm)
        if (!convert_from_efi_time(&eft, tm))
                return -EIO;
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int efi_set_time(struct device *dev, struct rtc_time *tm)
index 576eadbba296799eca4bc2ad45d13769799a7cf1..e1137670d4d27e846e4ecaecb020275205977230 100644 (file)
@@ -136,8 +136,7 @@ static int fm3130_get_time(struct device *dev, struct rtc_time *t)
                t->tm_hour, t->tm_mday,
                t->tm_mon, t->tm_year, t->tm_wday);
 
-       /* initial clock setting can be undefined */
-       return rtc_valid_tm(t);
+       return 0;
 }
 
 
index d67769265185929c53a48eba64100491f5622cce..a1c44d0c855780f8ba109e79c6bd62cb96dbb40d 100644 (file)
@@ -235,3 +235,5 @@ static struct platform_driver goldfish_rtc = {
 };
 
 module_platform_driver(goldfish_rtc);
+
+MODULE_LICENSE("GPL v2");
index 38586a024ee86f0e3854d534d875d071388a71a6..890ccfc9e5aabe6cd24f08ccacfb3496a054dfb3 100644 (file)
@@ -104,8 +104,9 @@ static int isl12022_write_reg(struct i2c_client *client,
  * In the routines that deal directly with the isl12022 hardware, we use
  * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
  */
-static int isl12022_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        uint8_t buf[ISL12022_REG_INT + 1];
        int ret;
 
@@ -149,11 +150,12 @@ static int isl12022_get_datetime(struct i2c_client *client, struct rtc_time *tm)
                tm->tm_sec, tm->tm_min, tm->tm_hour,
                tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
-static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct isl12022 *isl12022 = i2c_get_clientdata(client);
        size_t i;
        int ret;
@@ -199,7 +201,7 @@ static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
                                return ret;
                }
 
-               isl12022->write_enabled = 1;
+               isl12022->write_enabled = true;
        }
 
        /* hours, minutes and seconds */
@@ -228,16 +230,6 @@ static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
        return 0;
 }
 
-static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-       return isl12022_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-       return isl12022_set_datetime(to_i2c_client(dev), tm);
-}
-
 static const struct rtc_class_ops isl12022_rtc_ops = {
        .read_time      = isl12022_rtc_read_time,
        .set_time       = isl12022_rtc_set_time,
diff --git a/drivers/rtc/rtc-isl12026.c b/drivers/rtc/rtc-isl12026.c
new file mode 100644 (file)
index 0000000..97f594f
--- /dev/null
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * An I2C driver for the Intersil ISL 12026
+ *
+ * Copyright (c) 2018 Cavium, Inc.
+ */
+#include <linux/bcd.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/nvmem-provider.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/rtc.h>
+#include <linux/slab.h>
+
+/* register offsets */
+#define ISL12026_REG_PWR       0x14
+# define ISL12026_REG_PWR_BSW  BIT(6)
+# define ISL12026_REG_PWR_SBIB BIT(7)
+#define ISL12026_REG_SC                0x30
+#define ISL12026_REG_HR                0x32
+# define ISL12026_REG_HR_MIL   BIT(7)  /* military or 24 hour time */
+#define ISL12026_REG_SR                0x3f
+# define ISL12026_REG_SR_RTCF  BIT(0)
+# define ISL12026_REG_SR_WEL   BIT(1)
+# define ISL12026_REG_SR_RWEL  BIT(2)
+# define ISL12026_REG_SR_MBZ   BIT(3)
+# define ISL12026_REG_SR_OSCF  BIT(4)
+
+/* The EEPROM array responds at i2c address 0x57 */
+#define ISL12026_EEPROM_ADDR   0x57
+
+#define ISL12026_PAGESIZE 16
+#define ISL12026_NVMEM_WRITE_TIME 20
+
+struct isl12026 {
+       struct rtc_device *rtc;
+       struct i2c_client *nvm_client;
+};
+
+static int isl12026_read_reg(struct i2c_client *client, int reg)
+{
+       u8 addr[] = {0, reg};
+       u8 val;
+       int ret;
+
+       struct i2c_msg msgs[] = {
+               {
+                       .addr   = client->addr,
+                       .flags  = 0,
+                       .len    = sizeof(addr),
+                       .buf    = addr
+               }, {
+                       .addr   = client->addr,
+                       .flags  = I2C_M_RD,
+                       .len    = 1,
+                       .buf    = &val
+               }
+       };
+
+       ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+       if (ret != ARRAY_SIZE(msgs)) {
+               dev_err(&client->dev, "read reg error, ret=%d\n", ret);
+               ret = ret < 0 ? ret : -EIO;
+       } else {
+               ret = val;
+       }
+
+       return ret;
+}
+
+static int isl12026_arm_write(struct i2c_client *client)
+{
+       int ret;
+       u8 op[3];
+       struct i2c_msg msg = {
+               .addr   = client->addr,
+               .flags  = 0,
+               .len    = 1,
+               .buf    = op
+       };
+
+       /* Set SR.WEL */
+       op[0] = 0;
+       op[1] = ISL12026_REG_SR;
+       op[2] = ISL12026_REG_SR_WEL;
+       msg.len = 3;
+       ret = i2c_transfer(client->adapter, &msg, 1);
+       if (ret != 1) {
+               dev_err(&client->dev, "write error SR.WEL, ret=%d\n", ret);
+               ret = ret < 0 ? ret : -EIO;
+               goto out;
+       }
+
+       /* Set SR.WEL and SR.RWEL */
+       op[2] = ISL12026_REG_SR_WEL | ISL12026_REG_SR_RWEL;
+       msg.len = 3;
+       ret = i2c_transfer(client->adapter, &msg, 1);
+       if (ret != 1) {
+               dev_err(&client->dev,
+                       "write error SR.WEL|SR.RWEL, ret=%d\n", ret);
+               ret = ret < 0 ? ret : -EIO;
+               goto out;
+       } else {
+               ret = 0;
+       }
+out:
+       return ret;
+}
+
+static int isl12026_disarm_write(struct i2c_client *client)
+{
+       int ret;
+       u8 op[3] = {0, ISL12026_REG_SR, 0};
+       struct i2c_msg msg = {
+               .addr   = client->addr,
+               .flags  = 0,
+               .len    = sizeof(op),
+               .buf    = op
+       };
+
+       ret = i2c_transfer(client->adapter, &msg, 1);
+       if (ret != 1) {
+               dev_err(&client->dev,
+                       "write error SR, ret=%d\n", ret);
+               ret = ret < 0 ? ret : -EIO;
+       } else {
+               ret = 0;
+       }
+
+       return ret;
+}
+
+static int isl12026_write_reg(struct i2c_client *client, int reg, u8 val)
+{
+       int ret;
+       u8 op[3] = {0, reg, val};
+       struct i2c_msg msg = {
+               .addr   = client->addr,
+               .flags  = 0,
+               .len    = sizeof(op),
+               .buf    = op
+       };
+
+       ret = isl12026_arm_write(client);
+       if (ret)
+               return ret;
+
+       ret = i2c_transfer(client->adapter, &msg, 1);
+       if (ret != 1) {
+               dev_err(&client->dev, "write error CCR, ret=%d\n", ret);
+               ret = ret < 0 ? ret : -EIO;
+               goto out;
+       }
+
+       msleep(ISL12026_NVMEM_WRITE_TIME);
+
+       ret = isl12026_disarm_write(client);
+out:
+       return ret;
+}
+
+static int isl12026_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       int ret;
+       u8 op[10];
+       struct i2c_msg msg = {
+               .addr   = client->addr,
+               .flags  = 0,
+               .len    = sizeof(op),
+               .buf    = op
+       };
+
+       ret = isl12026_arm_write(client);
+       if (ret)
+               return ret;
+
+       /* Set the CCR registers */
+       op[0] = 0;
+       op[1] = ISL12026_REG_SC;
+       op[2] = bin2bcd(tm->tm_sec); /* SC */
+       op[3] = bin2bcd(tm->tm_min); /* MN */
+       op[4] = bin2bcd(tm->tm_hour) | ISL12026_REG_HR_MIL; /* HR */
+       op[5] = bin2bcd(tm->tm_mday); /* DT */
+       op[6] = bin2bcd(tm->tm_mon + 1); /* MO */
+       op[7] = bin2bcd(tm->tm_year % 100); /* YR */
+       op[8] = bin2bcd(tm->tm_wday & 7); /* DW */
+       op[9] = bin2bcd(tm->tm_year >= 100 ? 20 : 19); /* Y2K */
+       ret = i2c_transfer(client->adapter, &msg, 1);
+       if (ret != 1) {
+               dev_err(&client->dev, "write error CCR, ret=%d\n", ret);
+               ret = ret < 0 ? ret : -EIO;
+               goto out;
+       }
+
+       ret = isl12026_disarm_write(client);
+out:
+       return ret;
+}
+
+static int isl12026_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       u8 ccr[8];
+       u8 addr[2];
+       u8 sr;
+       int ret;
+       struct i2c_msg msgs[] = {
+               {
+                       .addr   = client->addr,
+                       .flags  = 0,
+                       .len    = sizeof(addr),
+                       .buf    = addr
+               }, {
+                       .addr   = client->addr,
+                       .flags  = I2C_M_RD,
+               }
+       };
+
+       /* First, read SR */
+       addr[0] = 0;
+       addr[1] = ISL12026_REG_SR;
+       msgs[1].len = 1;
+       msgs[1].buf = &sr;
+
+       ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+       if (ret != ARRAY_SIZE(msgs)) {
+               dev_err(&client->dev, "read error, ret=%d\n", ret);
+               ret = ret < 0 ? ret : -EIO;
+               goto out;
+       }
+
+       if (sr & ISL12026_REG_SR_RTCF)
+               dev_warn(&client->dev, "Real-Time Clock Failure on read\n");
+       if (sr & ISL12026_REG_SR_OSCF)
+               dev_warn(&client->dev, "Oscillator Failure on read\n");
+
+       /* Second, CCR regs */
+       addr[0] = 0;
+       addr[1] = ISL12026_REG_SC;
+       msgs[1].len = sizeof(ccr);
+       msgs[1].buf = ccr;
+
+       ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+       if (ret != ARRAY_SIZE(msgs)) {
+               dev_err(&client->dev, "read error, ret=%d\n", ret);
+               ret = ret < 0 ? ret : -EIO;
+               goto out;
+       }
+
+       tm->tm_sec = bcd2bin(ccr[0] & 0x7F);
+       tm->tm_min = bcd2bin(ccr[1] & 0x7F);
+       if (ccr[2] & ISL12026_REG_HR_MIL)
+               tm->tm_hour = bcd2bin(ccr[2] & 0x3F);
+       else
+               tm->tm_hour = bcd2bin(ccr[2] & 0x1F) +
+                       ((ccr[2] & 0x20) ? 12 : 0);
+       tm->tm_mday = bcd2bin(ccr[3] & 0x3F);
+       tm->tm_mon = bcd2bin(ccr[4] & 0x1F) - 1;
+       tm->tm_year = bcd2bin(ccr[5]);
+       if (bcd2bin(ccr[7]) == 20)
+               tm->tm_year += 100;
+       tm->tm_wday = ccr[6] & 0x07;
+
+       ret = 0;
+out:
+       return ret;
+}
+
+static const struct rtc_class_ops isl12026_rtc_ops = {
+       .read_time      = isl12026_rtc_read_time,
+       .set_time       = isl12026_rtc_set_time,
+};
+
+static int isl12026_nvm_read(void *p, unsigned int offset,
+                            void *val, size_t bytes)
+{
+       struct isl12026 *priv = p;
+       int ret;
+       u8 addr[2];
+       struct i2c_msg msgs[] = {
+               {
+                       .addr   = priv->nvm_client->addr,
+                       .flags  = 0,
+                       .len    = sizeof(addr),
+                       .buf    = addr
+               }, {
+                       .addr   = priv->nvm_client->addr,
+                       .flags  = I2C_M_RD,
+                       .buf    = val
+               }
+       };
+
+       /*
+        * offset and bytes checked and limited by nvmem core, so
+        * proceed without further checks.
+        */
+       ret = mutex_lock_interruptible(&priv->rtc->ops_lock);
+       if (ret)
+               return ret;
+
+       /* 2 bytes of address, most significant first */
+       addr[0] = offset >> 8;
+       addr[1] = offset;
+       msgs[1].len = bytes;
+       ret = i2c_transfer(priv->nvm_client->adapter, msgs, ARRAY_SIZE(msgs));
+
+       mutex_unlock(&priv->rtc->ops_lock);
+
+       if (ret != ARRAY_SIZE(msgs)) {
+               dev_err(&priv->nvm_client->dev,
+                       "nvmem read error, ret=%d\n", ret);
+               return ret < 0 ? ret : -EIO;
+       }
+
+       return 0;
+}
+
+static int isl12026_nvm_write(void *p, unsigned int offset,
+                             void *val, size_t bytes)
+{
+       struct isl12026 *priv = p;
+       int ret;
+       u8 *v = val;
+       size_t chunk_size, num_written;
+       u8 payload[ISL12026_PAGESIZE + 2]; /* page + 2 address bytes */
+       struct i2c_msg msgs[] = {
+               {
+                       .addr   = priv->nvm_client->addr,
+                       .flags  = 0,
+                       .buf    = payload
+               }
+       };
+
+       /*
+        * offset and bytes checked and limited by nvmem core, so
+        * proceed without further checks.
+        */
+       ret = mutex_lock_interruptible(&priv->rtc->ops_lock);
+       if (ret)
+               return ret;
+
+       num_written = 0;
+       while (bytes) {
+               chunk_size = round_down(offset, ISL12026_PAGESIZE) +
+                       ISL12026_PAGESIZE - offset;
+               chunk_size = min(bytes, chunk_size);
+               /*
+                * 2 bytes of address, most significant first, followed
+                * by page data bytes
+                */
+               memcpy(payload + 2, v + num_written, chunk_size);
+               payload[0] = offset >> 8;
+               payload[1] = offset;
+               msgs[0].len = chunk_size + 2;
+               ret = i2c_transfer(priv->nvm_client->adapter,
+                                  msgs, ARRAY_SIZE(msgs));
+               if (ret != ARRAY_SIZE(msgs)) {
+                       dev_err(&priv->nvm_client->dev,
+                               "nvmem write error, ret=%d\n", ret);
+                       ret = ret < 0 ? ret : -EIO;
+                       break;
+               }
+               ret = 0;
+               bytes -= chunk_size;
+               offset += chunk_size;
+               num_written += chunk_size;
+               msleep(ISL12026_NVMEM_WRITE_TIME);
+       }
+
+       mutex_unlock(&priv->rtc->ops_lock);
+
+       return ret;
+}
+
+static void isl12026_force_power_modes(struct i2c_client *client)
+{
+       int ret;
+       int pwr, requested_pwr;
+       u32 bsw_val, sbib_val;
+       bool set_bsw, set_sbib;
+
+       /*
+        * If we can read the of_property, set the specified value.
+        * If there is an error reading the of_property (likely
+        * because it does not exist), keep the current value.
+        */
+       ret = of_property_read_u32(client->dev.of_node,
+                                  "isil,pwr-bsw", &bsw_val);
+       set_bsw = (ret == 0);
+
+       ret = of_property_read_u32(client->dev.of_node,
+                                  "isil,pwr-sbib", &sbib_val);
+       set_sbib = (ret == 0);
+
+       /* Check if PWR.BSW and/or PWR.SBIB need specified values */
+       if (!set_bsw && !set_sbib)
+               return;
+
+       pwr = isl12026_read_reg(client, ISL12026_REG_PWR);
+       if (pwr < 0) {
+               dev_warn(&client->dev, "Error: Failed to read PWR %d\n", pwr);
+               return;
+       }
+
+       requested_pwr = pwr;
+
+       if (set_bsw) {
+               if (bsw_val)
+                       requested_pwr |= ISL12026_REG_PWR_BSW;
+               else
+                       requested_pwr &= ~ISL12026_REG_PWR_BSW;
+       } /* else keep current BSW */
+
+       if (set_sbib) {
+               if (sbib_val)
+                       requested_pwr |= ISL12026_REG_PWR_SBIB;
+               else
+                       requested_pwr &= ~ISL12026_REG_PWR_SBIB;
+       } /* else keep current SBIB */
+
+       if (pwr >= 0 && pwr != requested_pwr) {
+               dev_dbg(&client->dev, "PWR: %02x\n", pwr);
+               dev_dbg(&client->dev, "Updating PWR to: %02x\n", requested_pwr);
+               isl12026_write_reg(client, ISL12026_REG_PWR, requested_pwr);
+       }
+}
+
+static int isl12026_probe_new(struct i2c_client *client)
+{
+       struct isl12026 *priv;
+       int ret;
+       struct nvmem_config nvm_cfg = {
+               .name = "isl12026-",
+               .base_dev = &client->dev,
+               .stride = 1,
+               .word_size = 1,
+               .size = 512,
+               .reg_read = isl12026_nvm_read,
+               .reg_write = isl12026_nvm_write,
+       };
+
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+               return -ENODEV;
+
+       priv = devm_kzalloc(&client->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       i2c_set_clientdata(client, priv);
+
+       isl12026_force_power_modes(client);
+
+       priv->nvm_client = i2c_new_dummy(client->adapter, ISL12026_EEPROM_ADDR);
+       if (!priv->nvm_client)
+               return -ENOMEM;
+
+       priv->rtc = devm_rtc_allocate_device(&client->dev);
+       ret = PTR_ERR_OR_ZERO(priv->rtc);
+       if (ret)
+               return ret;
+
+       priv->rtc->ops = &isl12026_rtc_ops;
+       nvm_cfg.priv = priv;
+       ret = rtc_nvmem_register(priv->rtc, &nvm_cfg);
+       if (ret)
+               return ret;
+
+       return rtc_register_device(priv->rtc);
+}
+
+static int isl12026_remove(struct i2c_client *client)
+{
+       struct isl12026 *priv = i2c_get_clientdata(client);
+
+       i2c_unregister_device(priv->nvm_client);
+       return 0;
+}
+
+static const struct of_device_id isl12026_dt_match[] = {
+       { .compatible = "isil,isl12026" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, isl12026_dt_match);
+
+static struct i2c_driver isl12026_driver = {
+       .driver         = {
+               .name   = "rtc-isl12026",
+               .of_match_table = isl12026_dt_match,
+       },
+       .probe_new      = isl12026_probe_new,
+       .remove         = isl12026_remove,
+};
+
+module_i2c_driver(isl12026_driver);
+
+MODULE_DESCRIPTION("ISL 12026 RTC driver");
+MODULE_LICENSE("GPL");
index 8dd299c6a1f338b522f524ee519e5892bf90fe46..1a2c38cc01785c573c5bfeccbf332290020296a9 100644 (file)
@@ -459,6 +459,11 @@ isl1208_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
        }
 
        /* clear WRTC again */
+       sr = isl1208_i2c_get_sr(client);
+       if (sr < 0) {
+               dev_err(&client->dev, "%s: reading SR failed\n", __func__);
+               return sr;
+       }
        sr = i2c_smbus_write_byte_data(client, ISL1208_REG_SR,
                                       sr & ~ISL1208_REG_SR_WRTC);
        if (sr < 0) {
@@ -630,29 +635,12 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
        if (isl1208_i2c_validate_client(client) < 0)
                return -ENODEV;
 
-       if (client->irq > 0) {
-               rc = devm_request_threaded_irq(&client->dev, client->irq, NULL,
-                                              isl1208_rtc_interrupt,
-                                              IRQF_SHARED | IRQF_ONESHOT,
-                                              isl1208_driver.driver.name,
-                                              client);
-               if (!rc) {
-                       device_init_wakeup(&client->dev, 1);
-                       enable_irq_wake(client->irq);
-               } else {
-                       dev_err(&client->dev,
-                               "Unable to request irq %d, no alarm support\n",
-                               client->irq);
-                       client->irq = 0;
-               }
-       }
-
-       rtc = devm_rtc_device_register(&client->dev, isl1208_driver.driver.name,
-                                 &isl1208_rtc_ops,
-                                 THIS_MODULE);
+       rtc = devm_rtc_allocate_device(&client->dev);
        if (IS_ERR(rtc))
                return PTR_ERR(rtc);
 
+       rtc->ops = &isl1208_rtc_ops;
+
        i2c_set_clientdata(client, rtc);
 
        rc = isl1208_i2c_get_sr(client);
@@ -669,7 +657,24 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
        if (rc)
                return rc;
 
-       return 0;
+       if (client->irq > 0) {
+               rc = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+                                              isl1208_rtc_interrupt,
+                                              IRQF_SHARED | IRQF_ONESHOT,
+                                              isl1208_driver.driver.name,
+                                              client);
+               if (!rc) {
+                       device_init_wakeup(&client->dev, 1);
+                       enable_irq_wake(client->irq);
+               } else {
+                       dev_err(&client->dev,
+                               "Unable to request irq %d, no alarm support\n",
+                               client->irq);
+                       client->irq = 0;
+               }
+       }
+
+       return rtc_register_device(rtc);
 }
 
 static int
index ff65a7d2b9c9366c689d2efa09b5cc99ec3b5735..d0a891777f442b41981e22de3d1021ce5d7a66a9 100644 (file)
@@ -173,7 +173,7 @@ static int jz4740_rtc_read_time(struct device *dev, struct rtc_time *time)
 
        rtc_time_to_tm(secs, time);
 
-       return rtc_valid_tm(time);
+       return 0;
 }
 
 static int jz4740_rtc_set_mmss(struct device *dev, unsigned long secs)
index 1ae7da5cfc608f6c4bcb3764b00683e2a54b7919..4a3c0f3aab1490feef3111c667d21dcfee4e5e62 100644 (file)
@@ -52,13 +52,11 @@ EXPORT_SYMBOL(rtc_year_days);
  */
 void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
 {
-       unsigned int month, year;
-       unsigned long secs;
+       unsigned int month, year, secs;
        int days;
 
        /* time must be positive */
-       days = div_s64(time, 86400);
-       secs = time - (unsigned int) days * 86400;
+       days = div_s64_rem(time, 86400, &secs);
 
        /* day of the week, 1970-01-01 was a Thursday */
        tm->tm_wday = (days + 4) % 7;
@@ -67,7 +65,7 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
        days -= (year - 1970) * 365
                + LEAPS_THRU_END_OF(year - 1)
                - LEAPS_THRU_END_OF(1970 - 1);
-       if (days < 0) {
+       while (days < 0) {
                year -= 1;
                days += 365 + is_leap_year(year);
        }
index 59d99596fdebc83981cb23cbad13a9881ab5cb3e..14dc7b04fae04cd5d38933675493529b36b1ef25 100644 (file)
@@ -110,7 +110,7 @@ static int lpc24xx_rtc_read_time(struct device *dev, struct rtc_time *tm)
        tm->tm_year = CT1_YEAR(ct1);
        tm->tm_yday = CT2_DOY(ct2);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int lpc24xx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
index 887871c3d5261432e745c61c1fbfe8b50d95ff80..3ba87239aacced31f0fd5f5ac0a2f63ce01bdd0b 100644 (file)
@@ -70,7 +70,7 @@ static int lpc32xx_rtc_read_time(struct device *dev, struct rtc_time *time)
        elapsed_sec = rtc_readl(rtc, LPC32XX_RTC_UCOUNT);
        rtc_time_to_tm(elapsed_sec, time);
 
-       return rtc_valid_tm(time);
+       return 0;
 }
 
 static int lpc32xx_rtc_set_mmss(struct device *dev, unsigned long secs)
index e04ca54f21e20791fbf393de0154f4168b3c6026..045af1135e48a4049992ce7e8c176a116fb0f478 100644 (file)
@@ -98,7 +98,7 @@ static int ls1x_rtc_read_time(struct device *dev, struct rtc_time *rtm)
                        ls1x_get_min(v), ls1x_get_sec(v));
        rtc_time_to_tm(t, rtm);
 
-       return rtc_valid_tm(rtm);
+       return 0;
 }
 
 static int ls1x_rtc_set_time(struct device *dev, struct  rtc_time *rtm)
index c90fba3ed861881c0c813361dfaece9efac60938..ad03e2f12f5d3abacb16174e9dbffc2ad34d8fc4 100644 (file)
@@ -73,7 +73,6 @@
 #define M41T80_FEATURE_WD      BIT(3)  /* Extra watchdog resolution */
 #define M41T80_FEATURE_SQ_ALT  BIT(4)  /* RSx bits are in reg 4 */
 
-static DEFINE_MUTEX(m41t80_rtc_mutex);
 static const struct i2c_device_id m41t80_id[] = {
        { "m41t62", M41T80_FEATURE_SQ | M41T80_FEATURE_SQ_ALT },
        { "m41t65", M41T80_FEATURE_HT | M41T80_FEATURE_WD },
@@ -199,9 +198,9 @@ static irqreturn_t m41t80_handle_irq(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static int m41t80_get_datetime(struct i2c_client *client,
-                              struct rtc_time *tm)
+static int m41t80_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        unsigned char buf[8];
        int err, flags;
 
@@ -230,12 +229,12 @@ static int m41t80_get_datetime(struct i2c_client *client,
 
        /* assume 20YY not 19YY, and ignore the Century Bit */
        tm->tm_year = bcd2bin(buf[M41T80_REG_YEAR]) + 100;
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
-/* Sets the given date and time to the real time clock. */
-static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int m41t80_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct m41t80_data *clientdata = i2c_get_clientdata(client);
        unsigned char buf[8];
        int err, flags;
@@ -298,16 +297,6 @@ static int m41t80_rtc_proc(struct device *dev, struct seq_file *seq)
        return 0;
 }
 
-static int m41t80_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-       return m41t80_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int m41t80_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-       return m41t80_set_datetime(to_i2c_client(dev), tm);
-}
-
 static int m41t80_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
        struct i2c_client *client = to_i2c_client(dev);
@@ -598,6 +587,7 @@ static struct clk *m41t80_sqw_register_clk(struct m41t80_data *m41t80)
  *
  *****************************************************************************
  */
+static DEFINE_MUTEX(m41t80_rtc_mutex);
 static struct i2c_client *save_client;
 
 /* Default margin */
@@ -885,7 +875,6 @@ static int m41t80_probe(struct i2c_client *client,
 {
        struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
        int rc = 0;
-       struct rtc_device *rtc = NULL;
        struct rtc_time tm;
        struct m41t80_data *m41t80_data = NULL;
        bool wakeup_source = false;
@@ -909,6 +898,10 @@ static int m41t80_probe(struct i2c_client *client,
                m41t80_data->features = id->driver_data;
        i2c_set_clientdata(client, m41t80_data);
 
+       m41t80_data->rtc =  devm_rtc_allocate_device(&client->dev);
+       if (IS_ERR(m41t80_data->rtc))
+               return PTR_ERR(m41t80_data->rtc);
+
 #ifdef CONFIG_OF
        wakeup_source = of_property_read_bool(client->dev.of_node,
                                              "wakeup-source");
@@ -932,15 +925,11 @@ static int m41t80_probe(struct i2c_client *client,
                device_init_wakeup(&client->dev, true);
        }
 
-       rtc = devm_rtc_device_register(&client->dev, client->name,
-                                      &m41t80_rtc_ops, THIS_MODULE);
-       if (IS_ERR(rtc))
-               return PTR_ERR(rtc);
+       m41t80_data->rtc->ops = &m41t80_rtc_ops;
 
-       m41t80_data->rtc = rtc;
        if (client->irq <= 0) {
                /* We cannot support UIE mode if we do not have an IRQ line */
-               rtc->uie_unsupported = 1;
+               m41t80_data->rtc->uie_unsupported = 1;
        }
 
        /* Make sure HT (Halt Update) bit is cleared */
@@ -948,7 +937,7 @@ static int m41t80_probe(struct i2c_client *client,
 
        if (rc >= 0 && rc & M41T80_ALHOUR_HT) {
                if (m41t80_data->features & M41T80_FEATURE_HT) {
-                       m41t80_get_datetime(client, &tm);
+                       m41t80_rtc_read_time(&client->dev, &tm);
                        dev_info(&client->dev, "HT bit was set!\n");
                        dev_info(&client->dev,
                                 "Power Down at %04i-%02i-%02i %02i:%02i:%02i\n",
@@ -993,6 +982,11 @@ static int m41t80_probe(struct i2c_client *client,
        if (m41t80_data->features & M41T80_FEATURE_SQ)
                m41t80_sqw_register_clk(m41t80_data);
 #endif
+
+       rc = rtc_register_device(m41t80_data->rtc);
+       if (rc)
+               return rc;
+
        return 0;
 }
 
index 5ac45fc1a7873f2a7a44560980b8c47152ff146a..4a08a9dabc82d2ec1de0c601bd5e704b08426abc 100644 (file)
@@ -159,7 +159,7 @@ static int m41t93_get_time(struct device *dev, struct rtc_time *tm)
                tm->tm_hour, tm->tm_mday,
                tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-       return ret < 0 ? ret : rtc_valid_tm(tm);
+       return ret;
 }
 
 
index 1f0eb79e69f90e279c33be8efc771a357c7ede91..bab82b4be35681ba23463e8112cccd5230428dd2 100644 (file)
@@ -99,8 +99,7 @@ static int m41t94_read_time(struct device *dev, struct rtc_time *tm)
                tm->tm_hour, tm->tm_mday,
                tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-       /* initial clock setting can be undefined */
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static const struct rtc_class_ops m41t94_rtc_ops = {
index 810f4ea481e4dc4c8f0be4123656f1c5700a13fe..0cf6507de3c728868329d89d52c523b30ae8d36b 100644 (file)
@@ -84,7 +84,7 @@ static int m48t35_read_time(struct device *dev, struct rtc_time *tm)
                tm->tm_year += 100;
 
        tm->tm_mon--;
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int m48t35_set_time(struct device *dev, struct rtc_time *tm)
index d99a705bec07ac28103bc1d19ce52b97ffd25385..216fac62c888e94da57c1b5aa9e97e30237a350f 100644 (file)
@@ -105,7 +105,7 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
        dev_dbg(dev, "RTC read time %04d-%02d-%02d %02d/%02d/%02d\n",
                tm->tm_year + 1900, tm->tm_mon, tm->tm_mday,
                tm->tm_hour, tm->tm_min, tm->tm_sec);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int m48t59_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -334,16 +334,16 @@ static const struct rtc_class_ops m48t02_rtc_ops = {
        .set_time       = m48t59_rtc_set_time,
 };
 
-static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj,
-                               struct bin_attribute *bin_attr,
-                               char *buf, loff_t pos, size_t size)
+static int m48t59_nvram_read(void *priv, unsigned int offset, void *val,
+                            size_t size)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct platform_device *pdev = to_platform_device(dev);
+       struct platform_device *pdev = priv;
+       struct device *dev = &pdev->dev;
        struct m48t59_plat_data *pdata = dev_get_platdata(&pdev->dev);
        struct m48t59_private *m48t59 = platform_get_drvdata(pdev);
        ssize_t cnt = 0;
        unsigned long flags;
+       u8 *buf = val;
 
        spin_lock_irqsave(&m48t59->lock, flags);
 
@@ -352,19 +352,19 @@ static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj,
 
        spin_unlock_irqrestore(&m48t59->lock, flags);
 
-       return cnt;
+       return 0;
 }
 
-static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj,
-                               struct bin_attribute *bin_attr,
-                               char *buf, loff_t pos, size_t size)
+static int m48t59_nvram_write(void *priv, unsigned int offset, void *val,
+                             size_t size)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct platform_device *pdev = to_platform_device(dev);
+       struct platform_device *pdev = priv;
+       struct device *dev = &pdev->dev;
        struct m48t59_plat_data *pdata = dev_get_platdata(&pdev->dev);
        struct m48t59_private *m48t59 = platform_get_drvdata(pdev);
        ssize_t cnt = 0;
        unsigned long flags;
+       u8 *buf = val;
 
        spin_lock_irqsave(&m48t59->lock, flags);
 
@@ -373,18 +373,9 @@ static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj,
 
        spin_unlock_irqrestore(&m48t59->lock, flags);
 
-       return cnt;
+       return 0;
 }
 
-static struct bin_attribute m48t59_nvram_attr = {
-       .attr = {
-               .name = "nvram",
-               .mode = S_IRUGO | S_IWUSR,
-       },
-       .read = m48t59_nvram_read,
-       .write = m48t59_nvram_write,
-};
-
 static int m48t59_rtc_probe(struct platform_device *pdev)
 {
        struct m48t59_plat_data *pdata = dev_get_platdata(&pdev->dev);
@@ -393,6 +384,14 @@ static int m48t59_rtc_probe(struct platform_device *pdev)
        int ret = -ENOMEM;
        char *name;
        const struct rtc_class_ops *ops;
+       struct nvmem_config nvmem_cfg = {
+               .name = "m48t59-",
+               .word_size = 1,
+               .stride = 1,
+               .reg_read = m48t59_nvram_read,
+               .reg_write = m48t59_nvram_write,
+               .priv = pdev,
+       };
 
        /* This chip could be memory-mapped or I/O-mapped */
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -480,23 +479,22 @@ static int m48t59_rtc_probe(struct platform_device *pdev)
        spin_lock_init(&m48t59->lock);
        platform_set_drvdata(pdev, m48t59);
 
-       m48t59->rtc = devm_rtc_device_register(&pdev->dev, name, ops,
-                                               THIS_MODULE);
+       m48t59->rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(m48t59->rtc))
                return PTR_ERR(m48t59->rtc);
 
-       m48t59_nvram_attr.size = pdata->offset;
+       m48t59->rtc->nvram_old_abi = true;
+       m48t59->rtc->ops = ops;
 
-       ret = sysfs_create_bin_file(&pdev->dev.kobj, &m48t59_nvram_attr);
+       nvmem_cfg.size = pdata->offset;
+       ret = rtc_nvmem_register(m48t59->rtc, &nvmem_cfg);
        if (ret)
                return ret;
 
-       return 0;
-}
+       ret = rtc_register_device(m48t59->rtc);
+       if (ret)
+               return ret;
 
-static int m48t59_rtc_remove(struct platform_device *pdev)
-{
-       sysfs_remove_bin_file(&pdev->dev.kobj, &m48t59_nvram_attr);
        return 0;
 }
 
@@ -508,7 +506,6 @@ static struct platform_driver m48t59_rtc_driver = {
                .name   = "rtc-m48t59",
        },
        .probe          = m48t59_rtc_probe,
-       .remove         = m48t59_rtc_remove,
 };
 
 module_platform_driver(m48t59_rtc_driver);
index d9aea9b6d9cd9189767e8ff074d1c78ffabf9ade..a9533535c3b7d0d97140f30863147e97bb55cde0 100644 (file)
@@ -100,7 +100,7 @@ static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
                if (m48t86_readb(dev, M48T86_HOUR) & 0x80)
                        tm->tm_hour += 12;
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -218,21 +218,21 @@ static bool m48t86_verify_chip(struct platform_device *pdev)
        return false;
 }
 
-static struct nvmem_config m48t86_nvmem_cfg = {
-       .name = "m48t86_nvram",
-       .word_size = 1,
-       .stride = 1,
-       .size = M48T86_NVRAM_LEN,
-       .reg_read = m48t86_nvram_read,
-       .reg_write = m48t86_nvram_write,
-};
-
 static int m48t86_rtc_probe(struct platform_device *pdev)
 {
        struct m48t86_rtc_info *info;
        struct resource *res;
        unsigned char reg;
        int err;
+       struct nvmem_config m48t86_nvmem_cfg = {
+               .name = "m48t86_nvram",
+               .word_size = 1,
+               .stride = 1,
+               .size = M48T86_NVRAM_LEN,
+               .reg_read = m48t86_nvram_read,
+               .reg_write = m48t86_nvram_write,
+               .priv = &pdev->dev,
+       };
 
        info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
        if (!info)
@@ -264,15 +264,14 @@ static int m48t86_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(info->rtc);
 
        info->rtc->ops = &m48t86_rtc_ops;
-
-       m48t86_nvmem_cfg.priv = &pdev->dev;
-       info->rtc->nvmem_config = &m48t86_nvmem_cfg;
        info->rtc->nvram_old_abi = true;
 
        err = rtc_register_device(info->rtc);
        if (err)
                return err;
 
+       rtc_nvmem_register(info->rtc, &m48t86_nvmem_cfg);
+
        /* read battery status */
        reg = m48t86_readb(&pdev->dev, M48T86_D);
        dev_info(&pdev->dev, "battery %s\n",
index cbdc86a560bafd6b35eeb94aef65e5aa2f00f01e..ab60f13fa3efdc4105f59c248af39985d26b49fb 100644 (file)
@@ -139,8 +139,9 @@ static int max6900_i2c_write_regs(struct i2c_client *client, u8 const *buf)
        return -EIO;
 }
 
-static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
+static int max6900_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        int rc;
        u8 regs[MAX6900_REG_LEN];
 
@@ -157,7 +158,7 @@ static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
                      bcd2bin(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
        tm->tm_wday = bcd2bin(regs[MAX6900_REG_DW]);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int max6900_i2c_clear_write_protect(struct i2c_client *client)
@@ -165,9 +166,9 @@ static int max6900_i2c_clear_write_protect(struct i2c_client *client)
        return i2c_smbus_write_byte_data(client, MAX6900_REG_CONTROL_WRITE, 0);
 }
 
-static int
-max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
+static int max6900_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        u8 regs[MAX6900_REG_LEN];
        int rc;
 
@@ -193,16 +194,6 @@ max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
        return 0;
 }
 
-static int max6900_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-       return max6900_i2c_read_time(to_i2c_client(dev), tm);
-}
-
-static int max6900_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-       return max6900_i2c_set_time(to_i2c_client(dev), tm);
-}
-
 static const struct rtc_class_ops max6900_rtc_ops = {
        .read_time = max6900_rtc_read_time,
        .set_time = max6900_rtc_set_time,
index 315d09e0f2c1b930bfcf7c1ecc5c5f0181be322c..745827463367a7d845500cd61d7fae09b7502890 100644 (file)
@@ -85,7 +85,7 @@ static int max6902_read_time(struct device *dev, struct rtc_time *dt)
        dt->tm_year += century;
        dt->tm_year -= 1900;
 
-       return rtc_valid_tm(dt);
+       return 0;
 }
 
 static int max6902_set_time(struct device *dev, struct rtc_time *dt)
index 623ab27b2757cda88eb75486a88e87369f0034bc..7e908a490cf691e5deb6b8ac6182eb65d39661f5 100644 (file)
@@ -75,7 +75,7 @@ static int max6916_read_time(struct device *dev, struct rtc_time *dt)
        dt->tm_wday = bcd2bin(buf[5]) - 1;
        dt->tm_year = bcd2bin(buf[6]) + 100;
 
-       return rtc_valid_tm(dt);
+       return 0;
 }
 
 static int max6916_set_time(struct device *dev, struct rtc_time *dt)
index 182fdd00e290d3f2ce79b32a99ee99f2ff382f81..cefde273fae6e36494f3e79a7625424ea52422aa 100644 (file)
@@ -364,11 +364,9 @@ static int max77686_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        max77686_rtc_data_to_tm(data, tm, info);
 
-       ret = rtc_valid_tm(tm);
-
 out:
        mutex_unlock(&info->lock);
-       return ret;
+       return 0;
 }
 
 static int max77686_rtc_set_time(struct device *dev, struct rtc_time *tm)
index db984d4bf9526bbc78e501ff8da6a7036801872b..e8cee123e8aae3b1c08774d6d01796dd34ef71be 100644 (file)
@@ -153,7 +153,7 @@ static int max8997_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        max8997_rtc_data_to_tm(data, tm, info->rtc_24hr_mode);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int max8997_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 30804b00985e5f84770189a68eacad9854bea335..d8c0f9b3f87d5e330c943a575eaca6b16b7345f7 100644 (file)
@@ -120,7 +120,7 @@ static int max8998_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        max8998_data_to_tm(data, tm);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int max8998_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 30b8ef6a3676b73973a3fcbe427f1e752f29124c..1f892b238ddbc05094c1ec8da784e22a37a1d4fa 100644 (file)
@@ -85,7 +85,7 @@ static int mc13xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        rtc_time64_to_tm((time64_t)days1 * SEC_PER_DAY + seconds, tm);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int mc13xxx_rtc_set_mmss(struct device *dev, time64_t secs)
index 77f21331ae21c5f098cb7d3f307c294f1a62e889..00e11c1b2186c7e61eb585ec7f97b209e231d0aa 100644 (file)
@@ -82,7 +82,7 @@ static int mcp795_rtcc_write(struct device *dev, u8 addr, u8 *data, u8 count)
 {
        struct spi_device *spi = to_spi_device(dev);
        int ret;
-       u8 tx[2 + count];
+       u8 tx[257];
 
        tx[0] = MCP795_WRITE;
        tx[1] = addr;
@@ -262,7 +262,7 @@ static int mcp795_read_time(struct device *dev, struct rtc_time *tim)
                        tim->tm_year + 1900, tim->tm_mon, tim->tm_mday,
                        tim->tm_wday, tim->tm_hour, tim->tm_min, tim->tm_sec);
 
-       return rtc_valid_tm(tim);
+       return 0;
 }
 
 static int mcp795_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
index 4ca4daa0b8f32ae3761f759245332d1baccaf8c0..dd0364293bc068cc5dae1ddf064d499d6b98e2f8 100644 (file)
@@ -122,7 +122,7 @@ static int mpc5121_rtc_read_time(struct device *dev, struct rtc_time *tm)
         */
        mpc5121_rtc_update_smh(regs, tm);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int mpc5121_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 7334c44fa7c3553d61db84570e2a78733d2e24e6..fcb9de5218b20c3842e00781219ce09f95b1f853 100644 (file)
@@ -105,7 +105,7 @@ static int mrst_read_time(struct device *dev, struct rtc_time *time)
        /* Adjust for the 1972/1900 */
        time->tm_year += 72;
        time->tm_mon--;
-       return rtc_valid_tm(time);
+       return 0;
 }
 
 static int mrst_set_time(struct device *dev, struct rtc_time *time)
@@ -122,7 +122,7 @@ static int mrst_set_time(struct device *dev, struct rtc_time *time)
        min = time->tm_min;
        sec = time->tm_sec;
 
-       if (yrs < 72 || yrs > 138)
+       if (yrs < 72 || yrs > 172)
                return -EINVAL;
        yrs -= 72;
 
index c1c5c4e3b3b4715ea250a5b16bfaa49e2ebd688b..0c72a2e8ec67d27f5bfd296a7772210371c3d88f 100644 (file)
@@ -155,7 +155,7 @@ static int msm6242_read_time(struct device *dev, struct rtc_time *tm)
 
        msm6242_unlock(priv);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int msm6242_set_time(struct device *dev, struct rtc_time *tm)
index d79b9ae4d237fb4167c9f2e7c1b8b1c395d31202..fd0cea722286bab19d3259e8e037c2d89fe576e2 100644 (file)
@@ -232,7 +232,7 @@ static int mtk_rtc_gettime(struct device *dev, struct rtc_time *tm)
 
        mtk_rtc_get_alarm_or_time(hw, tm, MTK_TC);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int mtk_rtc_settime(struct device *dev, struct rtc_time *tm)
@@ -307,6 +307,7 @@ static const struct of_device_id mtk_rtc_match[] = {
        { .compatible = "mediatek,soc-rtc" },
        {},
 };
+MODULE_DEVICE_TABLE(of, mtk_rtc_match);
 
 static int mtk_rtc_probe(struct platform_device *pdev)
 {
index 79bb28617d458ec99f77518472f2f1505960d2b0..bc52dbb0c0e2c0f04f9239e3a998185239c513f8 100644 (file)
@@ -94,7 +94,7 @@ static int mv_rtc_read_time(struct device *dev, struct rtc_time *tm)
        /* hw counts from year 2000, but tm_year is relative to 1900 */
        tm->tm_year = bcd2bin(year) + 100;
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int mv_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
@@ -223,7 +223,6 @@ static int __init mv_rtc_probe(struct platform_device *pdev)
        struct resource *res;
        struct rtc_plat_data *pdata;
        u32 rtc_time;
-       u32 rtc_date;
        int ret = 0;
 
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
@@ -259,17 +258,6 @@ static int __init mv_rtc_probe(struct platform_device *pdev)
                }
        }
 
-       /*
-        * A date after January 19th, 2038 does not fit on 32 bits and
-        * will confuse the kernel and userspace. Reset to a sane date
-        * (January 1st, 2013) if we're after 2038.
-        */
-       rtc_date = readl(pdata->ioaddr + RTC_DATE_REG_OFFS);
-       if (bcd2bin((rtc_date >> RTC_YEAR_OFFS) & 0xff) >= 38) {
-               dev_info(&pdev->dev, "invalid RTC date, resetting to January 1st, 2013\n");
-               writel(0x130101, pdata->ioaddr + RTC_DATE_REG_OFFS);
-       }
-
        pdata->irq = platform_get_irq(pdev, 0);
 
        platform_set_drvdata(pdev, pdata);
index 784221dfc9c7ad1634fca0d8829e8f603ddf7bb5..9e14efb990b24d6873d3499ed582116b60092c5a 100644 (file)
@@ -273,7 +273,7 @@ static const struct rtc_class_ops mxc_rtc_ops = {
        .alarm_irq_enable = mxc_rtc_alarm_irq_enable,
 };
 
-static int mxc_rtc_wait_for_flag(void *__iomem ioaddr, int flag)
+static int mxc_rtc_wait_for_flag(void __iomem *ioaddr, int flag)
 {
        unsigned int timeout = REG_READ_TIMEOUT;
 
index 4ed81117cf5f346d663d11f0a799df4bd3801926..7da664a771817a164c752579c5e4f1647263f839 100644 (file)
@@ -102,8 +102,8 @@ static int *check_rtc_access_enable(struct nuc900_rtc *nuc900_rtc)
        return NULL;
 }
 
-static int nuc900_rtc_bcd2bin(unsigned int timereg,
-                               unsigned int calreg, struct rtc_time *tm)
+static void nuc900_rtc_bcd2bin(unsigned int timereg,
+                              unsigned int calreg, struct rtc_time *tm)
 {
        tm->tm_mday     = bcd2bin(calreg >> 0);
        tm->tm_mon      = bcd2bin(calreg >> 8);
@@ -112,8 +112,6 @@ static int nuc900_rtc_bcd2bin(unsigned int timereg,
        tm->tm_sec      = bcd2bin(timereg >> 0);
        tm->tm_min      = bcd2bin(timereg >> 8);
        tm->tm_hour     = bcd2bin(timereg >> 16);
-
-       return rtc_valid_tm(tm);
 }
 
 static void nuc900_rtc_bin2bcd(struct device *dev, struct rtc_time *settm,
@@ -156,7 +154,9 @@ static int nuc900_rtc_read_time(struct device *dev, struct rtc_time *tm)
        timeval = __raw_readl(rtc->rtc_reg + REG_RTC_TLR);
        clrval  = __raw_readl(rtc->rtc_reg + REG_RTC_CLR);
 
-       return nuc900_rtc_bcd2bin(timeval, clrval, tm);
+       nuc900_rtc_bcd2bin(timeval, clrval, tm);
+
+       return 0;
 }
 
 static int nuc900_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -189,7 +189,9 @@ static int nuc900_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        timeval = __raw_readl(rtc->rtc_reg + REG_RTC_TAR);
        carval  = __raw_readl(rtc->rtc_reg + REG_RTC_CAR);
 
-       return nuc900_rtc_bcd2bin(timeval, carval, &alrm->time);
+       nuc900_rtc_bcd2bin(timeval, carval, &alrm->time);
+
+       return rtc_valid_tm(&alrm->time);
 }
 
 static int nuc900_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
index 09ef802d6e544146fa5ccc1dcf969a1fc378d72d..39086398833e0f8ea95d272f91e4f09fec7cf68f 100644 (file)
@@ -273,9 +273,6 @@ static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 /* this hardware doesn't support "don't care" alarm fields */
 static int tm2bcd(struct rtc_time *tm)
 {
-       if (rtc_valid_tm(tm) != 0)
-               return -EINVAL;
-
        tm->tm_sec = bin2bcd(tm->tm_sec);
        tm->tm_min = bin2bcd(tm->tm_min);
        tm->tm_hour = bin2bcd(tm->tm_hour);
@@ -850,7 +847,6 @@ static int omap_rtc_probe(struct platform_device *pdev)
 
        rtc->rtc->ops = &omap_rtc_ops;
        omap_rtc_nvmem_config.priv = rtc;
-       rtc->rtc->nvmem_config = &omap_rtc_nvmem_config;
 
        /* handle periodic and alarm irqs */
        ret = devm_request_irq(&pdev->dev, rtc->irq_timer, rtc_irq, 0,
@@ -886,6 +882,8 @@ static int omap_rtc_probe(struct platform_device *pdev)
        if (ret)
                goto err;
 
+       rtc_nvmem_register(rtc->rtc, &omap_rtc_nvmem_config);
+
        return 0;
 
 err:
index c4433240d8a91180f8ad9bafc4c14456cd426213..c05f524ba9afa880e221b756c2ca44e6ecec573f 100644 (file)
@@ -95,7 +95,7 @@ static int pcap_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        rtc_time_to_tm(secs, tm);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int pcap_rtc_set_mmss(struct device *dev, unsigned long secs)
index 8895f77726e8da5444afcd602dceff8f25a9b3fd..e5222c5d822389b25d746607a45ccfbcfe3493ce 100644 (file)
@@ -289,7 +289,7 @@ static int pcf2123_rtc_read_time(struct device *dev, struct rtc_time *tm)
                        tm->tm_sec, tm->tm_min, tm->tm_hour,
                        tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int pcf2123_rtc_set_time(struct device *dev, struct rtc_time *tm)
index f33447c5db85e395ac540f43c1bf7ec69f48efcf..e83be1852c2fb276b25f1ff112e2be240e5aae0c 100644 (file)
@@ -111,7 +111,7 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
                tm->tm_sec, tm->tm_min, tm->tm_hour,
                tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 00c31c91b245fb080b5312cc2d7499268d3b1a4b..ef72b0c389d79ed01566550f2d67094e2c89a448 100644 (file)
@@ -135,7 +135,7 @@ static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm)
                tm->tm_mday, tm->tm_mon, tm->tm_year,
                tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int pcf50633_rtc_set_time(struct device *dev, struct rtc_time *tm)
index a06dff994c8316c0062b39d3704055a1305584a8..49bcbb3d4a696160798a3bbaa36a2be8e8e81db2 100644 (file)
@@ -70,7 +70,7 @@ static int pcf85063_start_clock(struct i2c_client *client, u8 ctrl1)
        s32 ret;
 
        /* start the clock */
-       ctrl1 &= PCF85063_REG_CTRL1_STOP;
+       ctrl1 &= ~PCF85063_REG_CTRL1_STOP;
 
        ret = i2c_smbus_write_byte_data(client, PCF85063_REG_CTRL1, ctrl1);
        if (ret < 0) {
@@ -81,8 +81,9 @@ static int pcf85063_start_clock(struct i2c_client *client, u8 ctrl1)
        return 0;
 }
 
-static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf85063_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        int rc;
        u8 regs[7];
 
@@ -114,11 +115,12 @@ static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
        tm->tm_year = bcd2bin(regs[6]);
        tm->tm_year += 100;
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
-static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        int rc;
        u8 regs[7];
        u8 ctrl1;
@@ -172,16 +174,6 @@ static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
        return 0;
 }
 
-static int pcf85063_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-       return pcf85063_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-       return pcf85063_set_datetime(to_i2c_client(dev), tm);
-}
-
 static const struct rtc_class_ops pcf85063_rtc_ops = {
        .read_time      = pcf85063_rtc_read_time,
        .set_time       = pcf85063_rtc_set_time
index c312af0db72957af5ed4980663fce838ab7cec1c..453615f8ac9a012ea0022227f947f5778c477fdf 100644 (file)
@@ -192,7 +192,7 @@ static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm)
        tm->tm_mon = bcd2bin(regs[5] & 0x1f) - 1;
        tm->tm_year = bcd2bin(regs[6]) + 100;
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int pcf8523_rtc_set_time(struct device *dev, struct rtc_time *tm)
index ea04e9f0930b697440af845abd7f413aea65093d..c04a1edcd571630f49ccfd26b558e2aba06516f9 100644 (file)
 #define CTRL_RESETS    0x2f
 #define CTRL_RAM       0x40
 
+#define ALRM_SEC_A1E   BIT(0)
+#define ALRM_MIN_A1E   BIT(1)
+#define ALRM_HR_A1E    BIT(2)
+#define ALRM_DAY_A1E   BIT(3)
+#define ALRM_MON_A1E   BIT(4)
+#define ALRM_MIN_A2E   BIT(5)
+#define ALRM_HR_A2E    BIT(6)
+#define ALRM_DAY_A2E   BIT(7)
+
+#define INT_WDIE       BIT(0)
+#define INT_BSIE       BIT(1)
+#define INT_TSRIE      BIT(2)
+#define INT_A2IE       BIT(3)
+#define INT_A1IE       BIT(4)
+#define INT_OIE                BIT(5)
+#define INT_PIE                BIT(6)
+#define INT_ILP                BIT(7)
+
+#define FLAGS_TSR1F    BIT(0)
+#define FLAGS_TSR2F    BIT(1)
+#define FLAGS_TSR3F    BIT(2)
+#define FLAGS_BSF      BIT(3)
+#define FLAGS_WDF      BIT(4)
+#define FLAGS_A1F      BIT(5)
+#define FLAGS_A2F      BIT(6)
+#define FLAGS_PIF      BIT(7)
+
+#define PIN_IO_INTAPM  GENMASK(1, 0)
+#define PIN_IO_INTA_CLK        0
+#define PIN_IO_INTA_BAT        1
+#define PIN_IO_INTA_OUT        2
+#define PIN_IO_INTA_HIZ        3
+
+#define STOP_EN_STOP   BIT(0)
+
+#define RESET_CPR      0xa4
+
 #define NVRAM_SIZE     0x40
 
 static struct i2c_driver pcf85363_driver;
@@ -80,7 +117,6 @@ static struct i2c_driver pcf85363_driver;
 struct pcf85363 {
        struct device           *dev;
        struct rtc_device       *rtc;
-       struct nvmem_config     nvmem_cfg;
        struct regmap           *regmap;
 };
 
@@ -116,8 +152,12 @@ static int pcf85363_rtc_read_time(struct device *dev, struct rtc_time *tm)
 static int pcf85363_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
        struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
-       unsigned char buf[DT_YEARS + 1];
-       int len = sizeof(buf);
+       unsigned char tmp[11];
+       unsigned char *buf = &tmp[2];
+       int ret;
+
+       tmp[0] = STOP_EN_STOP;
+       tmp[1] = RESET_CPR;
 
        buf[DT_100THS] = 0;
        buf[DT_SECS] = bin2bcd(tm->tm_sec);
@@ -128,8 +168,116 @@ static int pcf85363_rtc_set_time(struct device *dev, struct rtc_time *tm)
        buf[DT_MONTHS] = bin2bcd(tm->tm_mon + 1);
        buf[DT_YEARS] = bin2bcd(tm->tm_year % 100);
 
-       return regmap_bulk_write(pcf85363->regmap, DT_100THS,
-                                buf, len);
+       ret = regmap_bulk_write(pcf85363->regmap, CTRL_STOP_EN,
+                               tmp, sizeof(tmp));
+       if (ret)
+               return ret;
+
+       return regmap_write(pcf85363->regmap, CTRL_STOP_EN, 0);
+}
+
+static int pcf85363_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+       struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
+       unsigned char buf[DT_MONTH_ALM1 - DT_SECOND_ALM1 + 1];
+       unsigned int val;
+       int ret;
+
+       ret = regmap_bulk_read(pcf85363->regmap, DT_SECOND_ALM1, buf,
+                              sizeof(buf));
+       if (ret)
+               return ret;
+
+       alrm->time.tm_sec = bcd2bin(buf[0]);
+       alrm->time.tm_min = bcd2bin(buf[1]);
+       alrm->time.tm_hour = bcd2bin(buf[2]);
+       alrm->time.tm_mday = bcd2bin(buf[3]);
+       alrm->time.tm_mon = bcd2bin(buf[4]) - 1;
+
+       ret = regmap_read(pcf85363->regmap, CTRL_INTA_EN, &val);
+       if (ret)
+               return ret;
+
+       alrm->enabled =  !!(val & INT_A1IE);
+
+       return 0;
+}
+
+static int _pcf85363_rtc_alarm_irq_enable(struct pcf85363 *pcf85363, unsigned
+                                         int enabled)
+{
+       unsigned int alarm_flags = ALRM_SEC_A1E | ALRM_MIN_A1E | ALRM_HR_A1E |
+                                  ALRM_DAY_A1E | ALRM_MON_A1E;
+       int ret;
+
+       ret = regmap_update_bits(pcf85363->regmap, DT_ALARM_EN, alarm_flags,
+                                enabled ? alarm_flags : 0);
+       if (ret)
+               return ret;
+
+       ret = regmap_update_bits(pcf85363->regmap, CTRL_INTA_EN,
+                                INT_A1IE, enabled ? INT_A1IE : 0);
+
+       if (ret || enabled)
+               return ret;
+
+       /* clear current flags */
+       return regmap_update_bits(pcf85363->regmap, CTRL_FLAGS, FLAGS_A1F, 0);
+}
+
+static int pcf85363_rtc_alarm_irq_enable(struct device *dev,
+                                        unsigned int enabled)
+{
+       struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
+
+       return _pcf85363_rtc_alarm_irq_enable(pcf85363, enabled);
+}
+
+static int pcf85363_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+       struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
+       unsigned char buf[DT_MONTH_ALM1 - DT_SECOND_ALM1 + 1];
+       int ret;
+
+       buf[0] = bin2bcd(alrm->time.tm_sec);
+       buf[1] = bin2bcd(alrm->time.tm_min);
+       buf[2] = bin2bcd(alrm->time.tm_hour);
+       buf[3] = bin2bcd(alrm->time.tm_mday);
+       buf[4] = bin2bcd(alrm->time.tm_mon + 1);
+
+       /*
+        * Disable the alarm interrupt before changing the value to avoid
+        * spurious interrupts
+        */
+       ret = _pcf85363_rtc_alarm_irq_enable(pcf85363, 0);
+       if (ret)
+               return ret;
+
+       ret = regmap_bulk_write(pcf85363->regmap, DT_SECOND_ALM1, buf,
+                               sizeof(buf));
+       if (ret)
+               return ret;
+
+       return _pcf85363_rtc_alarm_irq_enable(pcf85363, alrm->enabled);
+}
+
+static irqreturn_t pcf85363_rtc_handle_irq(int irq, void *dev_id)
+{
+       struct pcf85363 *pcf85363 = i2c_get_clientdata(dev_id);
+       unsigned int flags;
+       int err;
+
+       err = regmap_read(pcf85363->regmap, CTRL_FLAGS, &flags);
+       if (err)
+               return IRQ_NONE;
+
+       if (flags & FLAGS_A1F) {
+               rtc_update_irq(pcf85363->rtc, 1, RTC_IRQF | RTC_AF);
+               regmap_update_bits(pcf85363->regmap, CTRL_FLAGS, FLAGS_A1F, 0);
+               return IRQ_HANDLED;
+       }
+
+       return IRQ_NONE;
 }
 
 static const struct rtc_class_ops rtc_ops = {
@@ -137,6 +285,14 @@ static const struct rtc_class_ops rtc_ops = {
        .set_time       = pcf85363_rtc_set_time,
 };
 
+static const struct rtc_class_ops rtc_ops_alarm = {
+       .read_time      = pcf85363_rtc_read_time,
+       .set_time       = pcf85363_rtc_set_time,
+       .read_alarm     = pcf85363_rtc_read_alarm,
+       .set_alarm      = pcf85363_rtc_set_alarm,
+       .alarm_irq_enable = pcf85363_rtc_alarm_irq_enable,
+};
+
 static int pcf85363_nvram_read(void *priv, unsigned int offset, void *val,
                               size_t bytes)
 {
@@ -158,12 +314,22 @@ static int pcf85363_nvram_write(void *priv, unsigned int offset, void *val,
 static const struct regmap_config regmap_config = {
        .reg_bits = 8,
        .val_bits = 8,
+       .max_register = 0x7f,
 };
 
 static int pcf85363_probe(struct i2c_client *client,
                          const struct i2c_device_id *id)
 {
        struct pcf85363 *pcf85363;
+       struct nvmem_config nvmem_cfg = {
+               .name = "pcf85363-",
+               .word_size = 1,
+               .stride = 1,
+               .size = NVRAM_SIZE,
+               .reg_read = pcf85363_nvram_read,
+               .reg_write = pcf85363_nvram_write,
+       };
+       int ret;
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
                return -ENODEV;
@@ -186,17 +352,28 @@ static int pcf85363_probe(struct i2c_client *client,
        if (IS_ERR(pcf85363->rtc))
                return PTR_ERR(pcf85363->rtc);
 
-       pcf85363->nvmem_cfg.name = "pcf85363-";
-       pcf85363->nvmem_cfg.word_size = 1;
-       pcf85363->nvmem_cfg.stride = 1;
-       pcf85363->nvmem_cfg.size = NVRAM_SIZE;
-       pcf85363->nvmem_cfg.reg_read = pcf85363_nvram_read;
-       pcf85363->nvmem_cfg.reg_write = pcf85363_nvram_write;
-       pcf85363->nvmem_cfg.priv = pcf85363;
-       pcf85363->rtc->nvmem_config = &pcf85363->nvmem_cfg;
        pcf85363->rtc->ops = &rtc_ops;
 
-       return rtc_register_device(pcf85363->rtc);
+       if (client->irq > 0) {
+               regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
+               regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
+                                  PIN_IO_INTA_OUT, PIN_IO_INTAPM);
+               ret = devm_request_threaded_irq(pcf85363->dev, client->irq,
+                                               NULL, pcf85363_rtc_handle_irq,
+                                               IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                               "pcf85363", client);
+               if (ret)
+                       dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
+               else
+                       pcf85363->rtc->ops = &rtc_ops_alarm;
+       }
+
+       ret = rtc_register_device(pcf85363->rtc);
+
+       nvmem_cfg.priv = pcf85363;
+       rtc_nvmem_register(pcf85363->rtc, &nvmem_cfg);
+
+       return ret;
 }
 
 static const struct of_device_id dev_ids[] = {
index 5cfb6df5c43032e294ccc5a610637ba6e049b27f..3c08eab4f1a81838f49e83df13eddd1a0bafff7b 100644 (file)
@@ -175,7 +175,7 @@ static int pic32_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
                rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
 
        clk_disable(pdata->clk);
-       return rtc_valid_tm(rtc_tm);
+       return 0;
 }
 
 static int pic32_rtc_settime(struct device *dev, struct rtc_time *tm)
index fac835530671ff003657a5807f750a73586be87e..29358a04592581c537d4d30fed775ecb81222175 100644 (file)
@@ -74,16 +74,18 @@ struct pm8xxx_rtc {
 /*
  * Steps to write the RTC registers.
  * 1. Disable alarm if enabled.
- * 2. Write 0x00 to LSB.
- * 3. Write Byte[1], Byte[2], Byte[3] then Byte[0].
- * 4. Enable alarm if disabled in step 1.
+ * 2. Disable rtc if enabled.
+ * 3. Write 0x00 to LSB.
+ * 4. Write Byte[1], Byte[2], Byte[3] then Byte[0].
+ * 5. Enable rtc if disabled in step 2.
+ * 6. Enable alarm if disabled in step 1.
  */
 static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
        int rc, i;
        unsigned long secs, irq_flags;
-       u8 value[NUM_8_BIT_RTC_REGS], alarm_enabled = 0;
-       unsigned int ctrl_reg;
+       u8 value[NUM_8_BIT_RTC_REGS], alarm_enabled = 0, rtc_disabled = 0;
+       unsigned int ctrl_reg, rtc_ctrl_reg;
        struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
        const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
 
@@ -92,23 +94,38 @@ static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 
        rtc_tm_to_time(tm, &secs);
 
+       dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs);
+
        for (i = 0; i < NUM_8_BIT_RTC_REGS; i++) {
                value[i] = secs & 0xFF;
                secs >>= 8;
        }
 
-       dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs);
-
        spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags);
 
-       rc = regmap_read(rtc_dd->regmap, regs->ctrl, &ctrl_reg);
+       rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg);
        if (rc)
                goto rtc_rw_fail;
 
        if (ctrl_reg & regs->alarm_en) {
                alarm_enabled = 1;
                ctrl_reg &= ~regs->alarm_en;
-               rc = regmap_write(rtc_dd->regmap, regs->ctrl, ctrl_reg);
+               rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
+               if (rc) {
+                       dev_err(dev, "Write to RTC Alarm control register failed\n");
+                       goto rtc_rw_fail;
+               }
+       }
+
+       /* Disable RTC H/w before writing on RTC register */
+       rc = regmap_read(rtc_dd->regmap, regs->ctrl, &rtc_ctrl_reg);
+       if (rc)
+               goto rtc_rw_fail;
+
+       if (rtc_ctrl_reg & PM8xxx_RTC_ENABLE) {
+               rtc_disabled = 1;
+               rtc_ctrl_reg &= ~PM8xxx_RTC_ENABLE;
+               rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg);
                if (rc) {
                        dev_err(dev, "Write to RTC control register failed\n");
                        goto rtc_rw_fail;
@@ -137,11 +154,21 @@ static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
                goto rtc_rw_fail;
        }
 
+       /* Enable RTC H/w after writing on RTC register */
+       if (rtc_disabled) {
+               rtc_ctrl_reg |= PM8xxx_RTC_ENABLE;
+               rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg);
+               if (rc) {
+                       dev_err(dev, "Write to RTC control register failed\n");
+                       goto rtc_rw_fail;
+               }
+       }
+
        if (alarm_enabled) {
                ctrl_reg |= regs->alarm_en;
-               rc = regmap_write(rtc_dd->regmap, regs->ctrl, ctrl_reg);
+               rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
                if (rc) {
-                       dev_err(dev, "Write to RTC control register failed\n");
+                       dev_err(dev, "Write to RTC Alarm control register failed\n");
                        goto rtc_rw_fail;
                }
        }
@@ -190,12 +217,6 @@ static int pm8xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
        rtc_time_to_tm(secs, tm);
 
-       rc = rtc_valid_tm(tm);
-       if (rc < 0) {
-               dev_err(dev, "Invalid time read from RTC\n");
-               return rc;
-       }
-
        dev_dbg(dev, "secs = %lu, h:m:s == %d:%d:%d, d/m/y = %d/%d/%d\n",
                secs, tm->tm_hour, tm->tm_min, tm->tm_sec,
                tm->tm_mday, tm->tm_mon, tm->tm_year);
index 6a8f5d758eac658f883378f3cee8b4fee2124486..347288bff43896c7dcc11eee61d311e8cf13040a 100644 (file)
@@ -41,7 +41,7 @@ static u64 read_rtc(void)
 static int ps3_get_time(struct device *dev, struct rtc_time *tm)
 {
        rtc_time_to_tm(read_rtc() + ps3_os_area_get_rtc_diff(), tm);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int ps3_set_time(struct device *dev, struct rtc_time *tm)
index 500e8c8a2605aa83c3e068e1ee2513d3704f4e30..169704b2ce1332923bd7a623846c7031bc597c28 100644 (file)
@@ -224,7 +224,7 @@ static int rtc7301_read_time(struct device *dev, struct rtc_time *tm)
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       return err ? err : rtc_valid_tm(tm);
+       return err;
 }
 
 static int rtc7301_set_time(struct device *dev, struct rtc_time *tm)
index b6c5eb97051c4443913a3fde38380441a7f6e18f..a39ccd1cf6e8f5f50d2a73bf9028c390f5c21d26 100644 (file)
@@ -92,7 +92,7 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt)
         * according to the data sheet. make sure they are valid.
         */
 
-       return rtc_valid_tm(dt);
+       return 0;
 }
 
 static int r9701_set_datetime(struct device *dev, struct rtc_time *dt)
index 35c9aada07c8ef3a19f44cf8d4a3e8811f484e3f..739c0d42e835321a8273d1371df361012281e7cb 100644 (file)
@@ -375,7 +375,6 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 {
        struct rk808 *rk808 = dev_get_drvdata(pdev->dev.parent);
        struct rk808_rtc *rk808_rtc;
-       struct rtc_time tm;
        int ret;
 
        rk808_rtc = devm_kzalloc(&pdev->dev, sizeof(*rk808_rtc), GFP_KERNEL);
@@ -404,24 +403,13 @@ static int rk808_rtc_probe(struct platform_device *pdev)
                        return ret;
        }
 
-       /* set init time */
-       ret = rk808_rtc_readtime(&pdev->dev, &tm);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to read RTC time\n");
-               return ret;
-       }
-       ret = rtc_valid_tm(&tm);
-       if (ret)
-               dev_warn(&pdev->dev, "invalid date/time\n");
-
        device_init_wakeup(&pdev->dev, 1);
 
-       rk808_rtc->rtc = devm_rtc_device_register(&pdev->dev, "rk808-rtc",
-                                                 &rk808_rtc_ops, THIS_MODULE);
-       if (IS_ERR(rk808_rtc->rtc)) {
-               ret = PTR_ERR(rk808_rtc->rtc);
-               return ret;
-       }
+       rk808_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
+       if (IS_ERR(rk808_rtc->rtc))
+               return PTR_ERR(rk808_rtc->rtc);
+
+       rk808_rtc->rtc->ops = &rk808_rtc_ops;
 
        rk808_rtc->irq = platform_get_irq(pdev, 0);
        if (rk808_rtc->irq < 0) {
@@ -438,9 +426,10 @@ static int rk808_rtc_probe(struct platform_device *pdev)
        if (ret) {
                dev_err(&pdev->dev, "Failed to request alarm IRQ %d: %d\n",
                        rk808_rtc->irq, ret);
+               return ret;
        }
 
-       return ret;
+       return rtc_register_device(rk808_rtc->rtc);
 }
 
 static struct platform_driver rk808_rtc_driver = {
index 026035373ae65a446122c6ebf39df9364b58718a..f1c160fe7d37e55a566a782498928a90431baed6 100644 (file)
@@ -64,7 +64,6 @@ struct rp5c01_priv {
        u32 __iomem *regs;
        struct rtc_device *rtc;
        spinlock_t lock;        /* against concurrent RTC/NVRAM access */
-       struct bin_attribute nvram_attr;
 };
 
 static inline unsigned int rp5c01_read(struct rp5c01_priv *priv,
@@ -116,7 +115,7 @@ static int rp5c01_read_time(struct device *dev, struct rtc_time *tm)
        rp5c01_unlock(priv);
        spin_unlock_irq(&priv->lock);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int rp5c01_set_time(struct device *dev, struct rtc_time *tm)
@@ -160,17 +159,15 @@ static const struct rtc_class_ops rp5c01_rtc_ops = {
  * byte is stored in BLOCK10, the low nibble in BLOCK11.
  */
 
-static ssize_t rp5c01_nvram_read(struct file *filp, struct kobject *kobj,
-                                struct bin_attribute *bin_attr,
-                                char *buf, loff_t pos, size_t size)
+static int rp5c01_nvram_read(void *_priv, unsigned int pos, void *val,
+                            size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct rp5c01_priv *priv = dev_get_drvdata(dev);
-       ssize_t count;
+       struct rp5c01_priv *priv = _priv;
+       u8 *buf = val;
 
        spin_lock_irq(&priv->lock);
 
-       for (count = 0; count < size; count++) {
+       for (; bytes; bytes--) {
                u8 data;
 
                rp5c01_write(priv,
@@ -187,20 +184,18 @@ static ssize_t rp5c01_nvram_read(struct file *filp, struct kobject *kobj,
        }
 
        spin_unlock_irq(&priv->lock);
-       return count;
+       return 0;
 }
 
-static ssize_t rp5c01_nvram_write(struct file *filp, struct kobject *kobj,
-                                 struct bin_attribute *bin_attr,
-                                 char *buf, loff_t pos, size_t size)
+static int rp5c01_nvram_write(void *_priv, unsigned int pos, void *val,
+                             size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct rp5c01_priv *priv = dev_get_drvdata(dev);
-       ssize_t count;
+       struct rp5c01_priv *priv = _priv;
+       u8 *buf = val;
 
        spin_lock_irq(&priv->lock);
 
-       for (count = 0; count < size; count++) {
+       for (; bytes; bytes--) {
                u8 data = *buf++;
 
                rp5c01_write(priv,
@@ -216,7 +211,7 @@ static ssize_t rp5c01_nvram_write(struct file *filp, struct kobject *kobj,
        }
 
        spin_unlock_irq(&priv->lock);
-       return count;
+       return 0;
 }
 
 static int __init rp5c01_rtc_probe(struct platform_device *dev)
@@ -225,6 +220,14 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev)
        struct rp5c01_priv *priv;
        struct rtc_device *rtc;
        int error;
+       struct nvmem_config nvmem_cfg = {
+               .name = "rp5c01_nvram",
+               .word_size = 1,
+               .stride = 1,
+               .size = RP5C01_MODE,
+               .reg_read = rp5c01_nvram_read,
+               .reg_write = rp5c01_nvram_write,
+       };
 
        res = platform_get_resource(dev, IORESOURCE_MEM, 0);
        if (!res)
@@ -238,43 +241,31 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev)
        if (!priv->regs)
                return -ENOMEM;
 
-       sysfs_bin_attr_init(&priv->nvram_attr);
-       priv->nvram_attr.attr.name = "nvram";
-       priv->nvram_attr.attr.mode = S_IRUGO | S_IWUSR;
-       priv->nvram_attr.read = rp5c01_nvram_read;
-       priv->nvram_attr.write = rp5c01_nvram_write;
-       priv->nvram_attr.size = RP5C01_MODE;
-
        spin_lock_init(&priv->lock);
 
        platform_set_drvdata(dev, priv);
 
-       rtc = devm_rtc_device_register(&dev->dev, "rtc-rp5c01", &rp5c01_rtc_ops,
-                                 THIS_MODULE);
+       rtc = devm_rtc_allocate_device(&dev->dev);
        if (IS_ERR(rtc))
                return PTR_ERR(rtc);
+
+       rtc->ops = &rp5c01_rtc_ops;
+       rtc->nvram_old_abi = true;
+
        priv->rtc = rtc;
 
-       error = sysfs_create_bin_file(&dev->dev.kobj, &priv->nvram_attr);
+       nvmem_cfg.priv = priv;
+       error = rtc_nvmem_register(rtc, &nvmem_cfg);
        if (error)
                return error;
 
-       return 0;
-}
-
-static int __exit rp5c01_rtc_remove(struct platform_device *dev)
-{
-       struct rp5c01_priv *priv = platform_get_drvdata(dev);
-
-       sysfs_remove_bin_file(&dev->dev.kobj, &priv->nvram_attr);
-       return 0;
+       return rtc_register_device(rtc);
 }
 
 static struct platform_driver rp5c01_rtc_driver = {
        .driver = {
                .name   = "rtc-rp5c01",
        },
-       .remove = __exit_p(rp5c01_rtc_remove),
 };
 
 module_platform_driver_probe(rp5c01_rtc_driver, rp5c01_rtc_probe);
index 9a306983aabaecefe0633aaaf7fb563606691fdd..f2de8b17e7e36541dcf52653d37188eb4cec9de2 100644 (file)
@@ -135,11 +135,6 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
        tm->tm_year = bcd2bin(rxbuf[RS5C348_REG_YEAR]) +
                ((rxbuf[RS5C348_REG_MONTH] & RS5C348_BIT_Y2K) ? 100 : 0);
 
-       if (rtc_valid_tm(tm) < 0) {
-               dev_err(&spi->dev, "retrieved date/time is not valid.\n");
-               rtc_time_to_tm(0, tm);
-       }
-
        return 0;
 }
 
index d4eff8d7131fd171769cc4238e8f550758e30b53..c5038329058c098487c1f77f7272b6ccfaaaab85 100644 (file)
@@ -207,8 +207,9 @@ static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
        return bin2bcd(hour);
 }
 
-static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct rs5c372  *rs5c = i2c_get_clientdata(client);
        int             status = rs5c_get_regs(rs5c);
 
@@ -234,12 +235,12 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
                tm->tm_sec, tm->tm_min, tm->tm_hour,
                tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-       /* rtc might need initialization */
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
-static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct rs5c372  *rs5c = i2c_get_clientdata(client);
        unsigned char   buf[7];
        int             addr;
@@ -305,17 +306,6 @@ static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim)
 }
 #endif
 
-static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-       return rs5c372_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-       return rs5c372_set_datetime(to_i2c_client(dev), tm);
-}
-
-
 static int rs5c_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
        struct i2c_client       *client = to_i2c_client(dev);
@@ -581,7 +571,6 @@ static int rs5c372_probe(struct i2c_client *client,
        int err = 0;
        int smbus_mode = 0;
        struct rs5c372 *rs5c372;
-       struct rtc_time tm;
 
        dev_dbg(&client->dev, "%s\n", __func__);
 
@@ -662,9 +651,6 @@ static int rs5c372_probe(struct i2c_client *client,
                goto exit;
        }
 
-       if (rs5c372_get_datetime(client, &tm) < 0)
-               dev_warn(&client->dev, "clock needs to be set\n");
-
        dev_info(&client->dev, "%s found, %s\n",
                        ({ char *s; switch (rs5c372->type) {
                        case rtc_r2025sd:       s = "r2025sd"; break;
index aae2576741a61585a9624217fa2a6cd1d26510e1..29fc3d210392387ec27814500fcc215a3c838682 100644 (file)
@@ -68,7 +68,6 @@ struct rv8803_data {
        struct mutex flags_lock;
        u8 ctrl;
        enum rv8803_type type;
-       struct nvmem_config nvmem_cfg;
 };
 
 static int rv8803_read_reg(const struct i2c_client *client, u8 reg)
@@ -528,6 +527,15 @@ static int rv8803_probe(struct i2c_client *client,
        struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
        struct rv8803_data *rv8803;
        int err, flags;
+       struct nvmem_config nvmem_cfg = {
+               .name = "rv8803_nvram",
+               .word_size = 1,
+               .stride = 1,
+               .size = 1,
+               .reg_read = rv8803_nvram_read,
+               .reg_write = rv8803_nvram_write,
+               .priv = client,
+       };
 
        if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
                                     I2C_FUNC_SMBUS_I2C_BLOCK)) {
@@ -582,21 +590,6 @@ static int rv8803_probe(struct i2c_client *client,
                }
        }
 
-       rv8803->nvmem_cfg.name = "rv8803_nvram",
-       rv8803->nvmem_cfg.word_size = 1,
-       rv8803->nvmem_cfg.stride = 1,
-       rv8803->nvmem_cfg.size = 1,
-       rv8803->nvmem_cfg.reg_read = rv8803_nvram_read,
-       rv8803->nvmem_cfg.reg_write = rv8803_nvram_write,
-       rv8803->nvmem_cfg.priv = client;
-
-       rv8803->rtc->ops = &rv8803_rtc_ops;
-       rv8803->rtc->nvmem_config = &rv8803->nvmem_cfg;
-       rv8803->rtc->nvram_old_abi = true;
-       err = rtc_register_device(rv8803->rtc);
-       if (err)
-               return err;
-
        err = rv8803_write_reg(rv8803->client, RV8803_EXT, RV8803_EXT_WADA);
        if (err)
                return err;
@@ -607,6 +600,14 @@ static int rv8803_probe(struct i2c_client *client,
                return err;
        }
 
+       rv8803->rtc->ops = &rv8803_rtc_ops;
+       rv8803->rtc->nvram_old_abi = true;
+       err = rtc_register_device(rv8803->rtc);
+       if (err)
+               return err;
+
+       rtc_nvmem_register(rv8803->rtc, &nvmem_cfg);
+
        rv8803->rtc->max_user_freq = 1;
 
        return 0;
index de3fe4f8d133d6924dccc6f39b81483b71ff0847..c59a218bdd872687c0271d2491564db7a033b070 100644 (file)
@@ -172,11 +172,7 @@ static int rx4581_get_datetime(struct device *dev, struct rtc_time *tm)
                tm->tm_sec, tm->tm_min, tm->tm_hour,
                tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-       err = rtc_valid_tm(tm);
-       if (err < 0)
-               dev_err(dev, "retrieved date/time is not valid.\n");
-
-       return err;
+       return 0;
 }
 
 static int rx4581_set_datetime(struct device *dev, struct rtc_time *tm)
index 7c9c08eab5e5b6baf52d0bafcf86d3d92698c693..8e322d884cc27fe6c711305ef6b6bc6571a7d820 100644 (file)
@@ -252,7 +252,7 @@ static int rx6110_get_time(struct device *dev, struct rtc_time *tm)
                tm->tm_sec, tm->tm_min, tm->tm_hour,
                tm->tm_mday, tm->tm_mon, tm->tm_year);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static const struct reg_sequence rx6110_default_regs[] = {
index 5c5938ab3d86bcb0bef62e9425553cce800b9d2b..7ddc22eb5b0fad3216dff4981541e1c4c419b24b 100644 (file)
@@ -138,7 +138,7 @@ static int rx8010_get_time(struct device *dev, struct rtc_time *dt)
        dt->tm_year = bcd2bin(date[RX8010_YEAR - RX8010_SEC]) + 100;
        dt->tm_wday = ffs(date[RX8010_WDAY - RX8010_SEC] & 0x7f);
 
-       return rtc_valid_tm(dt);
+       return 0;
 }
 
 static int rx8010_set_time(struct device *dev, struct rtc_time *dt)
index 91857d8d2df8707a75a0f931d5ba6afb6ed45afe..41127adf57655083f3e85eb773517725dab00bf0 100644 (file)
@@ -214,7 +214,7 @@ static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
                dt->tm_sec, dt->tm_min, dt->tm_hour,
                dt->tm_mday, dt->tm_mon, dt->tm_year);
 
-       return rtc_valid_tm(dt);
+       return 0;
 }
 
 static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
index 9998d7937688c6e27d351c846cdfe16b978b48ef..32caadf912ca2db422af86cd15847c28c2c71e04 100644 (file)
@@ -164,11 +164,7 @@ static int rx8581_get_datetime(struct i2c_client *client, struct rtc_time *tm)
                tm->tm_sec, tm->tm_min, tm->tm_hour,
                tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-       err = rtc_valid_tm(tm);
-       if (err < 0)
-               dev_err(&client->dev, "retrieved date/time is not valid.\n");
-
-       return err;
+       return 0;
 }
 
 static int rx8581_set_datetime(struct i2c_client *client, struct rtc_time *tm)
index 7067bca5c20d9d2c57777f40714db95b0c9a2fa0..77feb603cd4c0b363cf57abad36fb03b30f61cb7 100644 (file)
@@ -210,8 +210,9 @@ static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
        return hour;
 }
 
-static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct s35390a  *s35390a = i2c_get_clientdata(client);
        int i, err;
        char buf[7], status;
@@ -241,8 +242,9 @@ static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
        return err;
 }
 
-static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct s35390a *s35390a = i2c_get_clientdata(client);
        char buf[7], status;
        int i, err;
@@ -271,11 +273,12 @@ static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
                tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year,
                tm->tm_wday);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
-static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
+static int s35390a_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct s35390a *s35390a = i2c_get_clientdata(client);
        char buf[3], sts = 0;
        int err, i;
@@ -329,8 +332,9 @@ static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
        return err;
 }
 
-static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
+static int s35390a_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
+       struct i2c_client *client = to_i2c_client(dev);
        struct s35390a *s35390a = i2c_get_clientdata(client);
        char buf[3], sts;
        int i, err;
@@ -384,26 +388,6 @@ static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
        return 0;
 }
 
-static int s35390a_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-       return s35390a_read_alarm(to_i2c_client(dev), alm);
-}
-
-static int s35390a_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-       return s35390a_set_alarm(to_i2c_client(dev), alm);
-}
-
-static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-       return s35390a_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-       return s35390a_set_datetime(to_i2c_client(dev), tm);
-}
-
 static int s35390a_rtc_ioctl(struct device *dev, unsigned int cmd,
                             unsigned long arg)
 {
@@ -450,7 +434,6 @@ static int s35390a_probe(struct i2c_client *client,
        int err, err_read;
        unsigned int i;
        struct s35390a *s35390a;
-       struct rtc_time tm;
        char buf, status1;
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
@@ -508,9 +491,6 @@ static int s35390a_probe(struct i2c_client *client,
                }
        }
 
-       if (err_read > 0 || s35390a_get_datetime(client, &tm) < 0)
-               dev_warn(&client->dev, "clock needs to be set\n");
-
        device_set_wakeup_capable(&client->dev, 1);
 
        s35390a->rtc = devm_rtc_device_register(&client->dev,
index a8992c227f611807c998bbe81c434d9388cfd33f..75c8c5033e0877bc313527491df9a2e9edf21d3c 100644 (file)
@@ -232,7 +232,7 @@ retry_get_time:
 
        rtc_tm->tm_mon -= 1;
 
-       return rtc_valid_tm(rtc_tm);
+       return 0;
 }
 
 static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
index 0477678d968fd1ccdec8e36784e57e05910364dc..8428455432ca77c89790256cab801ab055c3fdd0 100644 (file)
  */
 #define UDR_READ_RETRY_CNT     5
 
+enum {
+       RTC_SEC = 0,
+       RTC_MIN,
+       RTC_HOUR,
+       RTC_WEEKDAY,
+       RTC_DATE,
+       RTC_MONTH,
+       RTC_YEAR1,
+       RTC_YEAR2,
+       /* Make sure this is always the last enum name. */
+       RTC_MAX_NUM_TIME_REGS
+};
+
 /*
  * Registers used by the driver which are different between chipsets.
  *
@@ -367,7 +380,7 @@ static void s5m8763_tm_to_data(struct rtc_time *tm, u8 *data)
 static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
        struct s5m_rtc_info *info = dev_get_drvdata(dev);
-       u8 data[info->regs->regs_count];
+       u8 data[RTC_MAX_NUM_TIME_REGS];
        int ret;
 
        if (info->regs->read_time_udr_mask) {
@@ -407,13 +420,13 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
                1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
                tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
        struct s5m_rtc_info *info = dev_get_drvdata(dev);
-       u8 data[info->regs->regs_count];
+       u8 data[RTC_MAX_NUM_TIME_REGS];
        int ret = 0;
 
        switch (info->device_type) {
@@ -450,7 +463,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
 static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct s5m_rtc_info *info = dev_get_drvdata(dev);
-       u8 data[info->regs->regs_count];
+       u8 data[RTC_MAX_NUM_TIME_REGS];
        unsigned int val;
        int ret, i;
 
@@ -500,7 +513,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 {
-       u8 data[info->regs->regs_count];
+       u8 data[RTC_MAX_NUM_TIME_REGS];
        int ret, i;
        struct rtc_time tm;
 
@@ -545,7 +558,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 {
        int ret;
-       u8 data[info->regs->regs_count];
+       u8 data[RTC_MAX_NUM_TIME_REGS];
        u8 alarm0_conf;
        struct rtc_time tm;
 
@@ -598,7 +611,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct s5m_rtc_info *info = dev_get_drvdata(dev);
-       u8 data[info->regs->regs_count];
+       u8 data[RTC_MAX_NUM_TIME_REGS];
        int ret;
 
        switch (info->device_type) {
index d544d5268757b5b1c429cabb8e2b37ad5c59e7e1..00d87d138984b230623bd16cf31184e33d178ee9 100644 (file)
@@ -376,7 +376,7 @@ static int sprd_rtc_read_time(struct device *dev, struct rtc_time *tm)
                return ret;
 
        rtc_time64_to_tm(secs, tm);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int sprd_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 6c2d3989f967baff96625ab39d0d19336b002def..4e8ab370ce63bb4ca097e35c21f7f99513073770 100644 (file)
@@ -414,7 +414,7 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
                tm->tm_sec, tm->tm_min, tm->tm_hour,
                tm->tm_mday, tm->tm_mon + 1, tm->tm_year, tm->tm_wday);
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 7367f617145cdeb4694d319ae0a1fb543a94f6b8..2a9e151cae992ae321b9ec384386fd9ff8b58ace 100644 (file)
@@ -204,23 +204,6 @@ static int sirfsoc_rtc_set_time(struct device *dev,
        return 0;
 }
 
-static int sirfsoc_rtc_ioctl(struct device *dev, unsigned int cmd,
-               unsigned long arg)
-{
-       switch (cmd) {
-       case RTC_PIE_ON:
-       case RTC_PIE_OFF:
-       case RTC_UIE_ON:
-       case RTC_UIE_OFF:
-       case RTC_AIE_ON:
-       case RTC_AIE_OFF:
-               return 0;
-
-       default:
-               return -ENOIOCTLCMD;
-       }
-}
-
 static int sirfsoc_rtc_alarm_irq_enable(struct device *dev,
                unsigned int enabled)
 {
@@ -250,7 +233,6 @@ static const struct rtc_class_ops sirfsoc_rtc_ops = {
        .set_time = sirfsoc_rtc_set_time,
        .read_alarm = sirfsoc_rtc_read_alarm,
        .set_alarm = sirfsoc_rtc_set_alarm,
-       .ioctl = sirfsoc_rtc_ioctl,
        .alarm_irq_enable = sirfsoc_rtc_alarm_irq_enable
 };
 
index d8ef9e052c4fc71f38a871a8293271cb469d495b..9af591d5223c3af8c6293a5f20704dc1da20d76e 100644 (file)
@@ -132,20 +132,23 @@ static int snvs_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
        struct snvs_rtc_data *data = dev_get_drvdata(dev);
        unsigned long time;
+       int ret;
 
        rtc_tm_to_time(tm, &time);
 
        /* Disable RTC first */
-       snvs_rtc_enable(data, false);
+       ret = snvs_rtc_enable(data, false);
+       if (ret)
+               return ret;
 
        /* Write 32-bit time to 47-bit timer, leaving 15 LSBs blank */
        regmap_write(data->regmap, data->offset + SNVS_LPSRTCLR, time << CNTR_TO_SECS_SH);
        regmap_write(data->regmap, data->offset + SNVS_LPSRTCMR, time >> (32 - CNTR_TO_SECS_SH));
 
        /* Enable RTC again */
-       snvs_rtc_enable(data, true);
+       ret = snvs_rtc_enable(data, true);
 
-       return 0;
+       return ret;
 }
 
 static int snvs_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -288,7 +291,11 @@ static int snvs_rtc_probe(struct platform_device *pdev)
        regmap_write(data->regmap, data->offset + SNVS_LPSR, 0xffffffff);
 
        /* Enable RTC */
-       snvs_rtc_enable(data, true);
+       ret = snvs_rtc_enable(data, true);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to enable rtc %d\n", ret);
+               goto error_rtc_device_register;
+       }
 
        device_init_wakeup(&pdev->dev, true);
 
index e377f42abae7ae952e2651aaa4805057d1d230c0..0567944fd4f89ada811158b0315e2eb473ce4f48 100644 (file)
@@ -170,18 +170,14 @@ static irqreturn_t spear_rtc_irq(int irq, void *dev_id)
 
 }
 
-static int tm2bcd(struct rtc_time *tm)
+static void tm2bcd(struct rtc_time *tm)
 {
-       if (rtc_valid_tm(tm) != 0)
-               return -EINVAL;
        tm->tm_sec = bin2bcd(tm->tm_sec);
        tm->tm_min = bin2bcd(tm->tm_min);
        tm->tm_hour = bin2bcd(tm->tm_hour);
        tm->tm_mday = bin2bcd(tm->tm_mday);
        tm->tm_mon = bin2bcd(tm->tm_mon + 1);
        tm->tm_year = bin2bcd(tm->tm_year);
-
-       return 0;
 }
 
 static void bcd2tm(struct rtc_time *tm)
@@ -237,8 +233,7 @@ static int spear_rtc_set_time(struct device *dev, struct rtc_time *tm)
        struct spear_rtc_config *config = dev_get_drvdata(dev);
        unsigned int time, date;
 
-       if (tm2bcd(tm) < 0)
-               return -EINVAL;
+       tm2bcd(tm);
 
        rtc_wait_not_busy(config);
        time = (tm->tm_sec << SECOND_SHIFT) | (tm->tm_min << MINUTE_SHIFT) |
@@ -295,8 +290,7 @@ static int spear_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
        unsigned int time, date;
        int err;
 
-       if (tm2bcd(&alm->time) < 0)
-               return -EINVAL;
+       tm2bcd(&alm->time);
 
        rtc_wait_not_busy(config);
 
index 82b0af159a28221cff367964b27206e8b402385d..d5222667f892ba892f2dec0074fe4be96e3a518d 100644 (file)
@@ -195,7 +195,6 @@ static int st_rtc_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct st_rtc *rtc;
        struct resource *res;
-       struct rtc_time tm_check;
        uint32_t mode;
        int ret = 0;
 
@@ -254,21 +253,6 @@ static int st_rtc_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, rtc);
 
-       /*
-        * The RTC-LPC is able to manage date.year > 2038
-        * but currently the kernel can not manage this date!
-        * If the RTC-LPC has a date.year > 2038 then
-        * it's set to the epoch "Jan 1st 2000"
-        */
-       st_rtc_read_time(&pdev->dev, &tm_check);
-
-       if (tm_check.tm_year >=  (2038 - 1900)) {
-               memset(&tm_check, 0, sizeof(tm_check));
-               tm_check.tm_year = 100;
-               tm_check.tm_mday = 1;
-               st_rtc_set_time(&pdev->dev, &tm_check);
-       }
-
        rtc->rtc_dev = rtc_device_register("st-lpc-rtc", &pdev->dev,
                                           &st_rtc_ops, THIS_MODULE);
        if (IS_ERR(rtc->rtc_dev)) {
index 7fc36973fa330e4845b84f688800d6903556de38..a7d49329d62668c271cacbfa3d15cee18bedbf7f 100644 (file)
@@ -28,7 +28,7 @@ static u32 starfire_get_time(void)
 static int starfire_read_time(struct device *dev, struct rtc_time *tm)
 {
        rtc_time_to_tm(starfire_get_time(), tm);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static const struct rtc_class_ops starfire_rtc_ops = {
index a456cb6177ea46120c96d07fb0fcf77ad38d5226..e70b78d17a98b01c83521c2a13fe2e4594fb87a6 100644 (file)
@@ -129,10 +129,6 @@ static int stk17ta8_rtc_read_time(struct device *dev, struct rtc_time *tm)
        /* year is 1900 + tm->tm_year */
        tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-       if (rtc_valid_tm(tm) < 0) {
-               dev_err(dev, "retrieved date/time is not valid.\n");
-               rtc_time_to_tm(0, tm);
-       }
        return 0;
 }
 
@@ -242,46 +238,30 @@ static const struct rtc_class_ops stk17ta8_rtc_ops = {
        .alarm_irq_enable       = stk17ta8_rtc_alarm_irq_enable,
 };
 
-static ssize_t stk17ta8_nvram_read(struct file *filp, struct kobject *kobj,
-                                struct bin_attribute *attr, char *buf,
-                                loff_t pos, size_t size)
+static int stk17ta8_nvram_read(void *priv, unsigned int pos, void *val,
+                              size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+       struct rtc_plat_data *pdata = priv;
        void __iomem *ioaddr = pdata->ioaddr;
-       ssize_t count;
+       u8 *buf = val;
 
-       for (count = 0; count < size; count++)
+       for (; bytes; bytes--)
                *buf++ = readb(ioaddr + pos++);
-       return count;
+       return 0;
 }
 
-static ssize_t stk17ta8_nvram_write(struct file *filp, struct kobject *kobj,
-                                 struct bin_attribute *attr, char *buf,
-                                 loff_t pos, size_t size)
+static int stk17ta8_nvram_write(void *priv, unsigned int pos, void *val,
+                               size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+       struct rtc_plat_data *pdata = priv;
        void __iomem *ioaddr = pdata->ioaddr;
-       ssize_t count;
+       u8 *buf = val;
 
-       for (count = 0; count < size; count++)
+       for (; bytes; bytes--)
                writeb(*buf++, ioaddr + pos++);
-       return count;
+       return 0;
 }
 
-static struct bin_attribute stk17ta8_nvram_attr = {
-       .attr = {
-               .name = "nvram",
-               .mode = S_IRUGO | S_IWUSR,
-       },
-       .size = RTC_OFFSET,
-       .read = stk17ta8_nvram_read,
-       .write = stk17ta8_nvram_write,
-};
-
 static int stk17ta8_rtc_probe(struct platform_device *pdev)
 {
        struct resource *res;
@@ -290,6 +270,14 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev)
        struct rtc_plat_data *pdata;
        void __iomem *ioaddr;
        int ret = 0;
+       struct nvmem_config nvmem_cfg = {
+               .name = "stk17ta8_nvram",
+               .word_size = 1,
+               .stride = 1,
+               .size = RTC_OFFSET,
+               .reg_read = stk17ta8_nvram_read,
+               .reg_write = stk17ta8_nvram_write,
+       };
 
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
@@ -328,24 +316,19 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev)
                }
        }
 
-       pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-                                 &stk17ta8_rtc_ops, THIS_MODULE);
+       pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(pdata->rtc))
                return PTR_ERR(pdata->rtc);
 
-       ret = sysfs_create_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
+       pdata->rtc->ops = &stk17ta8_rtc_ops;
+       pdata->rtc->nvram_old_abi = true;
 
-       return ret;
-}
+       nvmem_cfg.priv = pdata;
+       ret = rtc_nvmem_register(pdata->rtc, &nvmem_cfg);
+       if (ret)
+               return ret;
 
-static int stk17ta8_rtc_remove(struct platform_device *pdev)
-{
-       struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-
-       sysfs_remove_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
-       if (pdata->irq > 0)
-               writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
-       return 0;
+       return rtc_register_device(pdata->rtc);
 }
 
 /* work with hotplug and coldplug */
@@ -353,7 +336,6 @@ MODULE_ALIAS("platform:stk17ta8");
 
 static struct platform_driver stk17ta8_rtc_driver = {
        .probe          = stk17ta8_rtc_probe,
-       .remove         = stk17ta8_rtc_remove,
        .driver         = {
                .name   = "stk17ta8",
        },
index 5bc28eed1adf591a5c32f35e23968bf7b31f2ed5..2e6fb275acc82733de30da185cba99f83368acbb 100644 (file)
@@ -349,7 +349,7 @@ static int sun6i_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
         */
        rtc_tm->tm_year += SUN6I_YEAR_OFF;
 
-       return rtc_valid_tm(rtc_tm);
+       return 0;
 }
 
 static int sun6i_rtc_getalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
index abada609ddc736c8fdb7af2b1009e44aafa544b4..dadbf8b324ad24366c1f38db5195de03ab344e42 100644 (file)
@@ -261,7 +261,7 @@ static int sunxi_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
         */
        rtc_tm->tm_year += SUNXI_YEAR_OFF(chip->data_year);
 
-       return rtc_valid_tm(rtc_tm);
+       return 0;
 }
 
 static int sunxi_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
index 92ff2edb86a653afecda26f6ec9422715fb78e79..454da38c60122d3a91ffb3fef3369b7de3f6e203 100644 (file)
@@ -248,6 +248,14 @@ offset_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(offset);
 
+static ssize_t
+range_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "[%lld,%llu]\n", to_rtc_device(dev)->range_min,
+                      to_rtc_device(dev)->range_max);
+}
+static DEVICE_ATTR_RO(range);
+
 static struct attribute *rtc_attrs[] = {
        &dev_attr_name.attr,
        &dev_attr_date.attr,
@@ -257,6 +265,7 @@ static struct attribute *rtc_attrs[] = {
        &dev_attr_hctosys.attr,
        &dev_attr_wakealarm.attr,
        &dev_attr_offset.attr,
+       &dev_attr_range.attr,
        NULL,
 };
 
@@ -286,6 +295,9 @@ static umode_t rtc_attr_is_visible(struct kobject *kobj,
        } else if (attr == &dev_attr_offset.attr) {
                if (!rtc->ops->set_offset)
                        mode = 0;
+       } else if (attr == &dev_attr_range.attr) {
+               if (!(rtc->range_max - rtc->range_min))
+                       mode = 0;
        }
 
        return mode;
index d30d57b048d36ccb5c9ab3285e196d4e168207fc..66efff60c4d53d624834963bbd1a9dcd08c67a4a 100644 (file)
@@ -144,10 +144,6 @@ static int tegra_rtc_set_time(struct device *dev, struct rtc_time *tm)
        int ret;
 
        /* convert tm to seconds. */
-       ret = rtc_valid_tm(tm);
-       if (ret)
-               return ret;
-
        rtc_tm_to_time(tm, &sec);
 
        dev_vdbg(dev, "time set to %lu. %d/%d/%d %d:%02u:%02u\n",
index a3418a8a37965a909fc4a86cb1cd513d2c9f8c9a..d7785ae0a2b4e2e868d86c56631b557fa912f640 100644 (file)
@@ -90,7 +90,7 @@ static int tps6586x_rtc_read_time(struct device *dev, struct rtc_time *tm)
        seconds = ticks >> 10;
        seconds += rtc->epoch_start;
        rtc_time_to_tm(seconds, tm);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int tps6586x_rtc_set_time(struct device *dev, struct rtc_time *tm)
index 560d9a5e02253fe3ef5cf159c71d94198498b45c..08dbefc79520e57093cc3fefc9629e2f4200b95a 100644 (file)
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
-#include <asm/txx9/tx4939.h>
+
+#define TX4939_RTCCTL_ALME     0x00000080
+#define TX4939_RTCCTL_ALMD     0x00000040
+#define TX4939_RTCCTL_BUSY     0x00000020
+
+#define TX4939_RTCCTL_COMMAND  0x00000007
+#define TX4939_RTCCTL_COMMAND_NOP      0x00000000
+#define TX4939_RTCCTL_COMMAND_GETTIME  0x00000001
+#define TX4939_RTCCTL_COMMAND_SETTIME  0x00000002
+#define TX4939_RTCCTL_COMMAND_GETALARM 0x00000003
+#define TX4939_RTCCTL_COMMAND_SETALARM 0x00000004
+
+#define TX4939_RTCTBC_PM       0x00000080
+#define TX4939_RTCTBC_COMP     0x0000007f
+
+#define TX4939_RTC_REG_RAMSIZE 0x00000100
+#define TX4939_RTC_REG_RWBSIZE 0x00000006
+
+struct tx4939_rtc_reg {
+       __u32 ctl;
+       __u32 adr;
+       __u32 dat;
+       __u32 tbc;
+};
 
 struct tx4939rtc_plat_data {
        struct rtc_device *rtc;
@@ -86,9 +109,10 @@ static int tx4939_rtc_read_time(struct device *dev, struct rtc_time *tm)
        for (i = 2; i < 6; i++)
                buf[i] = __raw_readl(&rtcreg->dat);
        spin_unlock_irq(&pdata->lock);
-       sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+       sec = ((unsigned long)buf[5] << 24) | (buf[4] << 16) |
+               (buf[3] << 8) | buf[2];
        rtc_time_to_tm(sec, tm);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int tx4939_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -147,7 +171,8 @@ static int tx4939_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        alrm->enabled = (ctl & TX4939_RTCCTL_ALME) ? 1 : 0;
        alrm->pending = (ctl & TX4939_RTCCTL_ALMD) ? 1 : 0;
        spin_unlock_irq(&pdata->lock);
-       sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+       sec = ((unsigned long)buf[5] << 24) | (buf[4] << 16) |
+               (buf[3] << 8) | buf[2];
        rtc_time_to_tm(sec, &alrm->time);
        return rtc_valid_tm(&alrm->time);
 }
@@ -189,58 +214,52 @@ static const struct rtc_class_ops tx4939_rtc_ops = {
        .alarm_irq_enable       = tx4939_rtc_alarm_irq_enable,
 };
 
-static ssize_t tx4939_rtc_nvram_read(struct file *filp, struct kobject *kobj,
-                                    struct bin_attribute *bin_attr,
-                                    char *buf, loff_t pos, size_t size)
+static int tx4939_nvram_read(void *priv, unsigned int pos, void *val,
+                            size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+       struct tx4939rtc_plat_data *pdata = priv;
        struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
-       ssize_t count;
+       u8 *buf = val;
 
        spin_lock_irq(&pdata->lock);
-       for (count = 0; count < size; count++) {
+       for (; bytes; bytes--) {
                __raw_writel(pos++, &rtcreg->adr);
                *buf++ = __raw_readl(&rtcreg->dat);
        }
        spin_unlock_irq(&pdata->lock);
-       return count;
+       return 0;
 }
 
-static ssize_t tx4939_rtc_nvram_write(struct file *filp, struct kobject *kobj,
-                                     struct bin_attribute *bin_attr,
-                                     char *buf, loff_t pos, size_t size)
+static int tx4939_nvram_write(void *priv, unsigned int pos, void *val,
+                             size_t bytes)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
-       struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+       struct tx4939rtc_plat_data *pdata = priv;
        struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
-       ssize_t count;
+       u8 *buf = val;
 
        spin_lock_irq(&pdata->lock);
-       for (count = 0; count < size; count++) {
+       for (; bytes; bytes--) {
                __raw_writel(pos++, &rtcreg->adr);
                __raw_writel(*buf++, &rtcreg->dat);
        }
        spin_unlock_irq(&pdata->lock);
-       return count;
+       return 0;
 }
 
-static struct bin_attribute tx4939_rtc_nvram_attr = {
-       .attr = {
-               .name = "nvram",
-               .mode = S_IRUGO | S_IWUSR,
-       },
-       .size = TX4939_RTC_REG_RAMSIZE,
-       .read = tx4939_rtc_nvram_read,
-       .write = tx4939_rtc_nvram_write,
-};
-
 static int __init tx4939_rtc_probe(struct platform_device *pdev)
 {
        struct rtc_device *rtc;
        struct tx4939rtc_plat_data *pdata;
        struct resource *res;
        int irq, ret;
+       struct nvmem_config nvmem_cfg = {
+               .name = "rv8803_nvram",
+               .word_size = 4,
+               .stride = 4,
+               .size = TX4939_RTC_REG_RAMSIZE,
+               .reg_read = tx4939_nvram_read,
+               .reg_write = tx4939_nvram_write,
+       };
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
@@ -260,21 +279,27 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev)
        if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt,
                             0, pdev->name, &pdev->dev) < 0)
                return -EBUSY;
-       rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-                                 &tx4939_rtc_ops, THIS_MODULE);
+       rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(rtc))
                return PTR_ERR(rtc);
+
+       rtc->ops = &tx4939_rtc_ops;
+       rtc->nvram_old_abi = true;
+
        pdata->rtc = rtc;
-       ret = sysfs_create_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
 
-       return ret;
+       nvmem_cfg.priv = pdata;
+       ret = rtc_nvmem_register(rtc, &nvmem_cfg);
+       if (ret)
+               return ret;
+
+       return rtc_register_device(rtc);
 }
 
 static int __exit tx4939_rtc_remove(struct platform_device *pdev)
 {
        struct tx4939rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-       sysfs_remove_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
        spin_lock_irq(&pdata->lock);
        tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
        spin_unlock_irq(&pdata->lock);
index 75aea4c4d334bbaef61d8325762b223fa8735e8b..7b824dabf104a2cdfe635d46a88a3ab7d0486da2 100644 (file)
@@ -156,7 +156,7 @@ static int wm831x_rtc_readtime(struct device *dev, struct rtc_time *tm)
                        u32 time = (time1[0] << 16) | time1[1];
 
                        rtc_time_to_tm(time, tm);
-                       return rtc_valid_tm(tm);
+                       return 0;
                }
 
        } while (++count < WM831X_GET_TIME_RETRIES);
index 0c34d3b81279e535bbe027e77bce04443bd5b05a..153820876a820033cfebaf9cee45c5ae0162f8a6 100644 (file)
@@ -60,7 +60,7 @@ static int xgene_rtc_read_time(struct device *dev, struct rtc_time *tm)
        struct xgene_rtc_dev *pdata = dev_get_drvdata(dev);
 
        rtc_time_to_tm(readl(pdata->csr_base + RTC_CCVR), tm);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int xgene_rtc_set_mmss(struct device *dev, unsigned long secs)
index da18a8ae3c1ddbb6f56925005019a2bd9a0d75ea..fba994dc31eb406f00c25fc60ee47b1463cf1a5d 100644 (file)
@@ -122,7 +122,7 @@ static int xlnx_rtc_read_time(struct device *dev, struct rtc_time *tm)
                rtc_time64_to_tm(read_time, tm);
        }
 
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int xlnx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
index 0c177647ea6c71c00d5df725beea9b83c7e4e79e..718293d7242630b409d797cab864f734694006f6 100644 (file)
@@ -20,7 +20,7 @@
  * cases.
  *
  * -EPROTO is returned if now.tv_nsec is not close enough to *target_nsec.
- (
+ *
  * If temporary failure is indicated the caller should try again 'soon'
  */
 int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec)
index 1444333210c74929479cdaf2b9b411b74eb74b95..9ac7574e3cfb16be04c213fa02d3fb988f0fa5d0 100644 (file)
@@ -15,8 +15,8 @@ config BLK_DEV_XPRAM
 
 config DCSSBLK
        def_tristate m
-       select DAX
        select FS_DAX_LIMITED
+       select DAX_DRIVER
        prompt "DCSSBLK support"
        depends on S390 && BLOCK
        help
index a993d19fa56240221ba9b030603bde8200d64fd5..5c4535b545cc9564529b2c76877a24ce8cff8aff 100644 (file)
@@ -37,7 +37,7 @@ config QCOM_PM
 
 config QCOM_QMI_HELPERS
        tristate
-       depends on ARCH_QCOM
+       depends on ARCH_QCOM && NET
        help
          Helper library for handling QMI encoded messages.  QMI encoded
          messages are used in communication between the majority of QRTR
index 08bd8549242a9d2445ac7aca845f716a97b54ac8..17b314d9a148cb24bf252be3d76ee835c53c4dda 100644 (file)
@@ -83,12 +83,14 @@ EXPORT_SYMBOL_GPL(qcom_mdt_get_size);
  * @mem_region:        allocated memory region to load firmware into
  * @mem_phys:  physical address of allocated memory region
  * @mem_size:  size of the allocated memory region
+ * @reloc_base:        adjusted physical address after relocation
  *
  * Returns 0 on success, negative errno otherwise.
  */
 int qcom_mdt_load(struct device *dev, const struct firmware *fw,
                  const char *firmware, int pas_id, void *mem_region,
-                 phys_addr_t mem_phys, size_t mem_size)
+                 phys_addr_t mem_phys, size_t mem_size,
+                 phys_addr_t *reloc_base)
 {
        const struct elf32_phdr *phdrs;
        const struct elf32_phdr *phdr;
@@ -192,6 +194,9 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
                        memset(ptr + phdr->p_filesz, 0, phdr->p_memsz - phdr->p_filesz);
        }
 
+       if (reloc_base)
+               *reloc_base = mem_reloc;
+
 out:
        kfree(fw_name);
 
index c43ac574274cba1dd2083de9195fa1538a1973cf..3075358f3f08b97e3ab9f66592bf05e2c1f21ffa 100644 (file)
@@ -69,7 +69,7 @@ blkcnt_t dirty_cnt(struct inode *inode)
        void          *results[1];
 
        if (inode->i_mapping)
-               cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
+               cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->i_pages,
                                                  results, 0, 1,
                                                  PAGECACHE_TAG_DIRTY);
        if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
index 3b1c8e5a30537be432245bb5c4cc478772ef0bd6..8ee7b4d273b2ed0942d70feeb70e3201f8713f0c 100644 (file)
@@ -934,14 +934,14 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
        struct page *page;
        int found;
 
-       spin_lock_irq(&mapping->tree_lock);
-       found = radix_tree_gang_lookup(&mapping->page_tree,
+       xa_lock_irq(&mapping->i_pages);
+       found = radix_tree_gang_lookup(&mapping->i_pages,
                                       (void **)&page, offset, 1);
        if (found > 0 && !radix_tree_exceptional_entry(page)) {
                struct lu_dirpage *dp;
 
                get_page(page);
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                /*
                 * In contrast to find_lock_page() we are sure that directory
                 * page cannot be truncated (while DLM lock is held) and,
@@ -989,7 +989,7 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
                        page = ERR_PTR(-EIO);
                }
        } else {
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                page = NULL;
        }
        return page;
index 93753cb96180cff9c29b53d0446a2342d424b4a9..512fa87fa11bb2537c005b96efde8d3f0f64507a 100644 (file)
@@ -619,7 +619,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
        .g_volatile_ctrl = gc0310_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config gc0310_controls[] = {
+static const struct v4l2_ctrl_config gc0310_controls[] = {
        {
         .ops = &ctrl_ops,
         .id = V4L2_CID_EXPOSURE_ABSOLUTE,
index 834fba8c4fa09ecb7d49eb00c4289eb55f691dbd..44db9f9f1fc5487f3428adf57a3ec535c09a71d9 100644 (file)
@@ -107,7 +107,7 @@ mt9m114_write_reg(struct i2c_client *client, u16 data_length, u16 reg, u32 val)
        int num_msg;
        struct i2c_msg msg;
        unsigned char data[6] = {0};
-       u16 *wreg;
+       __be16 *wreg;
        int retry = 0;
 
        if (!client->adapter) {
@@ -130,18 +130,20 @@ again:
        msg.buf = data;
 
        /* high byte goes out first */
-       wreg = (u16 *)data;
+       wreg = (void *)data;
        *wreg = cpu_to_be16(reg);
 
        if (data_length == MISENSOR_8BIT) {
                data[2] = (u8)(val);
        } else if (data_length == MISENSOR_16BIT) {
-               u16 *wdata = (u16 *)&data[2];
-               *wdata = be16_to_cpu((u16)val);
+               u16 *wdata = (void *)&data[2];
+
+               *wdata = be16_to_cpu(*(__be16 *)&data[2]);
        } else {
                /* MISENSOR_32BIT */
-               u32 *wdata = (u32 *)&data[2];
-               *wdata = be32_to_cpu(val);
+               u32 *wdata = (void *)&data[2];
+
+               *wdata = be32_to_cpu(*(__be32 *)&data[2]);
        }
 
        num_msg = i2c_transfer(client->adapter, &msg, 1);
@@ -245,6 +247,7 @@ static int __mt9m114_flush_reg_array(struct i2c_client *client,
        const int num_msg = 1;
        int ret;
        int retry = 0;
+       __be16 *data16 = (void *)&ctrl->buffer.addr;
 
        if (ctrl->index == 0)
                return 0;
@@ -253,7 +256,7 @@ again:
        msg.addr = client->addr;
        msg.flags = 0;
        msg.len = 2 + ctrl->index;
-       ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+       *data16 = cpu_to_be16(ctrl->buffer.addr);
        msg.buf = (u8 *)&ctrl->buffer;
 
        ret = i2c_transfer(client->adapter, &msg, num_msg);
@@ -282,8 +285,8 @@ static int __mt9m114_buf_reg_array(struct i2c_client *client,
                                   struct mt9m114_write_ctrl *ctrl,
                                   const struct misensor_reg *next)
 {
-       u16 *data16;
-       u32 *data32;
+       __be16 *data16;
+       __be32 *data32;
        int err;
 
        /* Insufficient buffer? Let's flush and get more free space. */
@@ -298,11 +301,11 @@ static int __mt9m114_buf_reg_array(struct i2c_client *client,
                ctrl->buffer.data[ctrl->index] = (u8)next->val;
                break;
        case MISENSOR_16BIT:
-               data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+               data16 = (__be16 *)&ctrl->buffer.data[ctrl->index];
                *data16 = cpu_to_be16((u16)next->val);
                break;
        case MISENSOR_32BIT:
-               data32 = (u32 *)&ctrl->buffer.data[ctrl->index];
+               data32 = (__be32 *)&ctrl->buffer.data[ctrl->index];
                *data32 = cpu_to_be32(next->val);
                break;
        default:
index 11412061c40e2662fbad46faadf397a5d61faa96..c0849299d5923067af1aa945db1b45557da18529 100644 (file)
@@ -94,9 +94,9 @@ static int ov2680_read_reg(struct i2c_client *client,
        if (data_length == OV2680_8BIT)
                *val = (u8)data[0];
        else if (data_length == OV2680_16BIT)
-               *val = be16_to_cpu(*(u16 *)&data[0]);
+               *val = be16_to_cpu(*(__be16 *)&data[0]);
        else
-               *val = be32_to_cpu(*(u32 *)&data[0]);
+               *val = be32_to_cpu(*(__be32 *)&data[0]);
        //dev_dbg(&client->dev,  "++++i2c read adr%x = %x\n", reg,*val);
        return 0;
 }
@@ -121,7 +121,7 @@ static int ov2680_write_reg(struct i2c_client *client, u16 data_length,
 {
        int ret;
        unsigned char data[4] = {0};
-       u16 *wreg = (u16 *)data;
+       __be16 *wreg = (void *)data;
        const u16 len = data_length + sizeof(u16); /* 16-bit address + data */
 
        if (data_length != OV2680_8BIT && data_length != OV2680_16BIT) {
@@ -137,7 +137,8 @@ static int ov2680_write_reg(struct i2c_client *client, u16 data_length,
                data[2] = (u8)(val);
        } else {
                /* OV2680_16BIT */
-               u16 *wdata = (u16 *)&data[2];
+               __be16 *wdata = (void *)&data[2];
+
                *wdata = cpu_to_be16(val);
        }
 
@@ -169,12 +170,13 @@ static int __ov2680_flush_reg_array(struct i2c_client *client,
                                    struct ov2680_write_ctrl *ctrl)
 {
        u16 size;
+       __be16 *data16 = (void *)&ctrl->buffer.addr;
 
        if (ctrl->index == 0)
                return 0;
 
        size = sizeof(u16) + ctrl->index; /* 16-bit address + data */
-       ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+       *data16 = cpu_to_be16(ctrl->buffer.addr);
        ctrl->index = 0;
 
        return ov2680_i2c_write(client, size, (u8 *)&ctrl->buffer);
@@ -185,7 +187,7 @@ static int __ov2680_buf_reg_array(struct i2c_client *client,
                                  const struct ov2680_reg *next)
 {
        int size;
-       u16 *data16;
+       __be16 *data16;
 
        switch (next->type) {
        case OV2680_8BIT:
@@ -194,7 +196,7 @@ static int __ov2680_buf_reg_array(struct i2c_client *client,
                break;
        case OV2680_16BIT:
                size = 2;
-               data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+               data16 = (void *)&ctrl->buffer.data[ctrl->index];
                *data16 = cpu_to_be16((u16)next->val);
                break;
        default:
@@ -722,7 +724,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
        .g_volatile_ctrl = ov2680_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config ov2680_controls[] = {
+static const struct v4l2_ctrl_config ov2680_controls[] = {
        {
         .ops = &ctrl_ops,
         .id = V4L2_CID_EXPOSURE_ABSOLUTE,
index e59358ac89ce62aaedb63f4e24fed2f6cec02749..a362eebd882f7b7b9c1252c5e37d60bbe8b5bf44 100644 (file)
@@ -85,9 +85,9 @@ static int ov2722_read_reg(struct i2c_client *client,
        if (data_length == OV2722_8BIT)
                *val = (u8)data[0];
        else if (data_length == OV2722_16BIT)
-               *val = be16_to_cpu(*(u16 *)&data[0]);
+               *val = be16_to_cpu(*(__be16 *)&data[0]);
        else
-               *val = be32_to_cpu(*(u32 *)&data[0]);
+               *val = be32_to_cpu(*(__be32 *)&data[0]);
 
        return 0;
 }
@@ -112,7 +112,7 @@ static int ov2722_write_reg(struct i2c_client *client, u16 data_length,
 {
        int ret;
        unsigned char data[4] = {0};
-       u16 *wreg = (u16 *)data;
+       __be16 *wreg = (__be16 *)data;
        const u16 len = data_length + sizeof(u16); /* 16-bit address + data */
 
        if (data_length != OV2722_8BIT && data_length != OV2722_16BIT) {
@@ -128,7 +128,8 @@ static int ov2722_write_reg(struct i2c_client *client, u16 data_length,
                data[2] = (u8)(val);
        } else {
                /* OV2722_16BIT */
-               u16 *wdata = (u16 *)&data[2];
+               __be16 *wdata = (__be16 *)&data[2];
+
                *wdata = cpu_to_be16(val);
        }
 
@@ -160,12 +161,13 @@ static int __ov2722_flush_reg_array(struct i2c_client *client,
                                    struct ov2722_write_ctrl *ctrl)
 {
        u16 size;
+       __be16 *data16 = (void *)&ctrl->buffer.addr;
 
        if (ctrl->index == 0)
                return 0;
 
        size = sizeof(u16) + ctrl->index; /* 16-bit address + data */
-       ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+       *data16 = cpu_to_be16(ctrl->buffer.addr);
        ctrl->index = 0;
 
        return ov2722_i2c_write(client, size, (u8 *)&ctrl->buffer);
@@ -176,7 +178,7 @@ static int __ov2722_buf_reg_array(struct i2c_client *client,
                                  const struct ov2722_reg *next)
 {
        int size;
-       u16 *data16;
+       __be16 *data16;
 
        switch (next->type) {
        case OV2722_8BIT:
@@ -185,7 +187,7 @@ static int __ov2722_buf_reg_array(struct i2c_client *client,
                break;
        case OV2722_16BIT:
                size = 2;
-               data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+               data16 = (void *)&ctrl->buffer.data[ctrl->index];
                *data16 = cpu_to_be16((u16)next->val);
                break;
        default:
@@ -569,7 +571,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
        .g_volatile_ctrl = ov2722_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config ov2722_controls[] = {
+static const struct v4l2_ctrl_config ov2722_controls[] = {
        {
         .ops = &ctrl_ops,
         .id = V4L2_CID_EXPOSURE_ABSOLUTE,
index af6b11f6e5e77bd49453f9bb81210077d9017d86..70c252c5163c622dd80fcfd0240016c84c286a68 100644 (file)
@@ -377,8 +377,7 @@ static struct gc0310_reg const gc0310_VGA_30fps[] = {
        {GC0310_TOK_TERM, 0, 0},
 };
 
-
-struct gc0310_resolution gc0310_res_preview[] = {
+static struct gc0310_resolution gc0310_res_preview[] = {
        {
                .desc = "gc0310_VGA_30fps",
                .width = 656, // 648,
index 028b04aaaa8f5ca28d35af3ed4d872bc3a196e6d..757b37613ccc9cbed5dc0db68fcbffba8670b705 100644 (file)
@@ -1096,7 +1096,7 @@ static struct ov2722_reg const ov2722_720p_30fps[] = {
        {OV2722_TOK_TERM, 0, 0},
 };
 
-struct ov2722_resolution ov2722_res_preview[] = {
+static struct ov2722_resolution ov2722_res_preview[] = {
        {
                .desc = "ov2722_1632_1092_30fps",
                .width = 1632,
index 30a735e59e548f673386453404272c296a903ab5..714297c36b3e97da463b6eba3c21c8c0e80a74fe 100644 (file)
@@ -173,9 +173,9 @@ static int ov5693_read_reg(struct i2c_client *client,
        if (data_length == OV5693_8BIT)
                *val = (u8)data[0];
        else if (data_length == OV5693_16BIT)
-               *val = be16_to_cpu(*(u16 *)&data[0]);
+               *val = be16_to_cpu(*(__be16 *)&data[0]);
        else
-               *val = be32_to_cpu(*(u32 *)&data[0]);
+               *val = be32_to_cpu(*(__be32 *)&data[0]);
 
        return 0;
 }
@@ -200,13 +200,13 @@ static int vcm_dw_i2c_write(struct i2c_client *client, u16 data)
        struct i2c_msg msg;
        const int num_msg = 1;
        int ret;
-       u16 val;
+       __be16 val;
 
        val = cpu_to_be16(data);
        msg.addr = VCM_ADDR;
        msg.flags = 0;
        msg.len = OV5693_16BIT;
-       msg.buf = (u8 *)&val;
+       msg.buf = (void *)&val;
 
        ret = i2c_transfer(client->adapter, &msg, 1);
 
@@ -263,7 +263,7 @@ static int ov5693_write_reg(struct i2c_client *client, u16 data_length,
 {
        int ret;
        unsigned char data[4] = {0};
-       u16 *wreg = (u16 *)data;
+       __be16 *wreg = (void *)data;
        const u16 len = data_length + sizeof(u16); /* 16-bit address + data */
 
        if (data_length != OV5693_8BIT && data_length != OV5693_16BIT) {
@@ -279,7 +279,8 @@ static int ov5693_write_reg(struct i2c_client *client, u16 data_length,
                data[2] = (u8)(val);
        } else {
                /* OV5693_16BIT */
-               u16 *wdata = (u16 *)&data[2];
+               __be16 *wdata = (void *)&data[2];
+
                *wdata = cpu_to_be16(val);
        }
 
@@ -311,15 +312,17 @@ static int __ov5693_flush_reg_array(struct i2c_client *client,
                                    struct ov5693_write_ctrl *ctrl)
 {
        u16 size;
+       __be16 *reg = (void *)&ctrl->buffer.addr;
 
        if (ctrl->index == 0)
                return 0;
 
        size = sizeof(u16) + ctrl->index; /* 16-bit address + data */
-       ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+
+       *reg = cpu_to_be16(ctrl->buffer.addr);
        ctrl->index = 0;
 
-       return ov5693_i2c_write(client, size, (u8 *)&ctrl->buffer);
+       return ov5693_i2c_write(client, size, (u8 *)reg);
 }
 
 static int __ov5693_buf_reg_array(struct i2c_client *client,
@@ -327,7 +330,7 @@ static int __ov5693_buf_reg_array(struct i2c_client *client,
                                  const struct ov5693_reg *next)
 {
        int size;
-       u16 *data16;
+       __be16 *data16;
 
        switch (next->type) {
        case OV5693_8BIT:
@@ -336,7 +339,8 @@ static int __ov5693_buf_reg_array(struct i2c_client *client,
                break;
        case OV5693_16BIT:
                size = 2;
-               data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+
+               data16 = (void *)&ctrl->buffer.data[ctrl->index];
                *data16 = cpu_to_be16((u16)next->val);
                break;
        default:
@@ -951,7 +955,7 @@ static int ad5823_t_focus_vcm(struct v4l2_subdev *sd, u16 val)
        return ret;
 }
 
-int ad5823_t_focus_abs(struct v4l2_subdev *sd, s32 value)
+static int ad5823_t_focus_abs(struct v4l2_subdev *sd, s32 value)
 {
        value = min(value, AD5823_MAX_FOCUS_POS);
        return ad5823_t_focus_vcm(sd, value);
@@ -1132,7 +1136,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
        .g_volatile_ctrl = ov5693_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config ov5693_controls[] = {
+static const struct v4l2_ctrl_config ov5693_controls[] = {
        {
         .ops = &ctrl_ops,
         .id = V4L2_CID_EXPOSURE_ABSOLUTE,
index 6d27dd849a621d5dfba9ba7c4d928b549f5c0e94..9058a82455a60c348eb7d54f6ae2f3e65a01e364 100644 (file)
@@ -1087,7 +1087,7 @@ static struct ov5693_reg const ov5693_2576x1936_30fps[] = {
        {OV5693_TOK_TERM, 0, 0}
 };
 
-struct ov5693_resolution ov5693_res_preview[] = {
+static struct ov5693_resolution ov5693_res_preview[] = {
        {
                .desc = "ov5693_736x496_30fps",
                .width = 736,
index e0f0c379e7ce125e72d5acb3329acd79b9a94115..aa5e294e7b7dc5429c7e1867a99a0bd4df22ff1c 100644 (file)
@@ -104,6 +104,10 @@ enum atomisp_input_format {
        ATOMISP_INPUT_FORMAT_USER_DEF8,  /* User defined 8-bit data type 8 */
 };
 
+#define N_ATOMISP_INPUT_FORMAT (ATOMISP_INPUT_FORMAT_USER_DEF8 + 1)
+
+
+
 enum intel_v4l2_subdev_type {
        RAW_CAMERA = 1,
        SOC_CAMERA = 2,
index 83f816faba1bc1a387331311db219ea093b0117a..7fead5fc9a7df48a047a8a7231be4d87a0d3e269 100644 (file)
@@ -59,17 +59,14 @@ atomisp-objs += \
        css2400/isp/kernels/bnr/bnr_1.0/ia_css_bnr.host.o \
        css2400/isp/kernels/bnr/bnr2_2/ia_css_bnr2_2.host.o \
        css2400/isp/kernels/dpc2/ia_css_dpc2.host.o \
-       css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.o \
        css2400/isp/kernels/fc/fc_1.0/ia_css_formats.host.o \
        css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc.host.o \
        css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc_table.host.o \
        css2400/isp/kernels/ctc/ctc2/ia_css_ctc2.host.o \
        css2400/isp/kernels/ctc/ctc1_5/ia_css_ctc1_5.host.o \
        css2400/isp/kernels/bh/bh_2/ia_css_bh.host.o \
-       css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.o \
        css2400/isp/kernels/bnlm/ia_css_bnlm.host.o \
        css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.o \
-       css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.o \
        css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs.host.o \
        css2400/isp/kernels/anr/anr_1.0/ia_css_anr.host.o \
        css2400/isp/kernels/anr/anr_2/ia_css_anr2_table.host.o \
@@ -96,7 +93,6 @@ atomisp-objs += \
        css2400/isp/kernels/ob/ob2/ia_css_ob2.host.o \
        css2400/isp/kernels/iterator/iterator_1.0/ia_css_iterator.host.o \
        css2400/isp/kernels/wb/wb_1.0/ia_css_wb.host.o \
-       css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.o \
        css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.o \
        css2400/isp/kernels/sc/sc_1.0/ia_css_sc.host.o \
        css2400/isp/kernels/ipu2_io_ls/bayer_io_ls/ia_css_bayer_io.host.o \
index 22f2dbcecc150c162ce58edf22c59ec7abef3a34..fa6ea506f8b192f42747b9627a43cbf1ec07fbc0 100644 (file)
@@ -437,7 +437,7 @@ static void atomisp_reset_event(struct atomisp_sub_device *asd)
 }
 
 
-static void print_csi_rx_errors(enum ia_css_csi2_port port,
+static void print_csi_rx_errors(enum mipi_port_id port,
                                struct atomisp_device *isp)
 {
        u32 infos = 0;
@@ -481,7 +481,7 @@ static void clear_irq_reg(struct atomisp_device *isp)
 }
 
 static struct atomisp_sub_device *
-__get_asd_from_port(struct atomisp_device *isp, mipi_port_ID_t port)
+__get_asd_from_port(struct atomisp_device *isp, enum mipi_port_id port)
 {
        int i;
 
@@ -515,7 +515,7 @@ irqreturn_t atomisp_isr(int irq, void *dev)
 
        spin_lock_irqsave(&isp->lock, flags);
        if (isp->sw_contex.power_state != ATOM_ISP_POWER_UP ||
-           isp->css_initialized == false) {
+           !isp->css_initialized) {
                spin_unlock_irqrestore(&isp->lock, flags);
                return IRQ_HANDLED;
        }
@@ -570,9 +570,9 @@ irqreturn_t atomisp_isr(int irq, void *dev)
            (irq_infos & CSS_IRQ_INFO_IF_ERROR)) {
                /* handle mipi receiver error */
                u32 rx_infos;
-               enum ia_css_csi2_port port;
+               enum mipi_port_id port;
 
-               for (port = IA_CSS_CSI2_PORT0; port <= IA_CSS_CSI2_PORT2;
+               for (port = MIPI_PORT0_ID; port <= MIPI_PORT2_ID;
                     port++) {
                        print_csi_rx_errors(port, isp);
                        atomisp_css_rx_get_irq_info(port, &rx_infos);
@@ -4603,7 +4603,7 @@ int atomisp_fixed_pattern(struct atomisp_sub_device *asd, int flag,
        }
 
        if (*value == 0) {
-               asd->params.fpn_en = 0;
+               asd->params.fpn_en = false;
                return 0;
        }
 
@@ -5028,7 +5028,7 @@ atomisp_try_fmt_file(struct atomisp_device *isp, struct v4l2_format *f)
        return 0;
 }
 
-mipi_port_ID_t __get_mipi_port(struct atomisp_device *isp,
+enum mipi_port_id __get_mipi_port(struct atomisp_device *isp,
                                enum atomisp_camera_port port)
 {
        switch (port) {
@@ -5162,22 +5162,22 @@ static int __enable_continuous_mode(struct atomisp_sub_device *asd,
        return atomisp_update_run_mode(asd);
 }
 
-int configure_pp_input_nop(struct atomisp_sub_device *asd,
-                          unsigned int width, unsigned int height)
+static int configure_pp_input_nop(struct atomisp_sub_device *asd,
+                                 unsigned int width, unsigned int height)
 {
        return 0;
 }
 
-int configure_output_nop(struct atomisp_sub_device *asd,
-                        unsigned int width, unsigned int height,
-                        unsigned int min_width,
-                        enum atomisp_css_frame_format sh_fmt)
+static int configure_output_nop(struct atomisp_sub_device *asd,
+                               unsigned int width, unsigned int height,
+                               unsigned int min_width,
+                               enum atomisp_css_frame_format sh_fmt)
 {
        return 0;
 }
 
-int get_frame_info_nop(struct atomisp_sub_device *asd,
-                      struct atomisp_css_frame_info *finfo)
+static int get_frame_info_nop(struct atomisp_sub_device *asd,
+                             struct atomisp_css_frame_info *finfo)
 {
        return 0;
 }
@@ -5524,7 +5524,7 @@ static void atomisp_get_dis_envelop(struct atomisp_sub_device *asd,
 
        /* if subdev type is SOC camera,we do not need to set DVS */
        if (isp->inputs[asd->input_curr].type == SOC_CAMERA)
-               asd->params.video_dis_en = 0;
+               asd->params.video_dis_en = false;
 
        if (asd->params.video_dis_en &&
            asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO) {
@@ -5624,7 +5624,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
                        ffmt = req_ffmt;
                        dev_warn(isp->dev,
                          "can not enable video dis due to sensor limitation.");
-                       asd->params.video_dis_en = 0;
+                       asd->params.video_dis_en = false;
                }
        }
        dev_dbg(isp->dev, "sensor width: %d, height: %d\n",
@@ -5649,7 +5649,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
            (ffmt->width < req_ffmt->width || ffmt->height < req_ffmt->height)) {
                dev_warn(isp->dev,
                         "can not enable video dis due to sensor limitation.");
-               asd->params.video_dis_en = 0;
+               asd->params.video_dis_en = false;
        }
 
        atomisp_subdev_set_ffmt(&asd->subdev, fh.pad,
@@ -6152,7 +6152,7 @@ int atomisp_set_shading_table(struct atomisp_sub_device *asd,
 
        if (!user_shading_table->enable) {
                atomisp_css_set_shading_table(asd, NULL);
-               asd->params.sc_en = 0;
+               asd->params.sc_en = false;
                return 0;
        }
 
@@ -6190,7 +6190,7 @@ int atomisp_set_shading_table(struct atomisp_sub_device *asd,
        free_table = asd->params.css_param.shading_table;
        asd->params.css_param.shading_table = shading_table;
        atomisp_css_set_shading_table(asd, shading_table);
-       asd->params.sc_en = 1;
+       asd->params.sc_en = true;
 
 out:
        if (free_table != NULL)
@@ -6627,7 +6627,7 @@ int atomisp_inject_a_fake_event(struct atomisp_sub_device *asd, int *event)
        return 0;
 }
 
-int atomisp_get_pipe_id(struct atomisp_video_pipe *pipe)
+static int atomisp_get_pipe_id(struct atomisp_video_pipe *pipe)
 {
        struct atomisp_sub_device *asd = pipe->asd;
 
index bdc73862fb7928ddafcb4e03b40eeb12d86df222..79d493dba40375202137cc720659b7158d768703 100644 (file)
@@ -389,7 +389,7 @@ int atomisp_source_pad_to_stream_id(struct atomisp_sub_device *asd,
  */
 void atomisp_eof_event(struct atomisp_sub_device *asd, uint8_t exp_id);
 
-mipi_port_ID_t __get_mipi_port(struct atomisp_device *isp,
+enum mipi_port_id __get_mipi_port(struct atomisp_device *isp,
                                enum atomisp_camera_port port);
 
 bool atomisp_is_vf_pipe(struct atomisp_video_pipe *pipe);
index 3ef850cd25bdc09ef151f9f1a712325662bddaa3..6c829d0a1e4cef033a3cb56672c09a80c78a9743 100644 (file)
@@ -148,10 +148,10 @@ void atomisp_css_init_struct(struct atomisp_sub_device *asd);
 int atomisp_css_irq_translate(struct atomisp_device *isp,
                              unsigned int *infos);
 
-void atomisp_css_rx_get_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_get_irq_info(enum mipi_port_id port,
                                        unsigned int *infos);
 
-void atomisp_css_rx_clear_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_clear_irq_info(enum mipi_port_id port,
                                        unsigned int infos);
 
 int atomisp_css_irq_enable(struct atomisp_device *isp,
@@ -182,8 +182,6 @@ void atomisp_css_mmu_invalidate_cache(void);
 
 void atomisp_css_mmu_invalidate_tlb(void);
 
-void atomisp_css_mmu_set_page_table_base_index(unsigned long base_index);
-
 int atomisp_css_start(struct atomisp_sub_device *asd,
                      enum atomisp_css_pipe_id pipe_id, bool in_reset);
 
@@ -255,7 +253,7 @@ void atomisp_css_isys_set_valid(struct atomisp_sub_device *asd,
 
 void atomisp_css_isys_set_format(struct atomisp_sub_device *asd,
                                 enum atomisp_input_stream_id stream_id,
-                                enum atomisp_css_stream_format format,
+                                enum atomisp_input_format format,
                                 int isys_stream);
 
 int atomisp_css_set_default_isys_config(struct atomisp_sub_device *asd,
@@ -264,18 +262,18 @@ int atomisp_css_set_default_isys_config(struct atomisp_sub_device *asd,
 
 int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
                                    enum atomisp_input_stream_id stream_id,
-                                   enum atomisp_css_stream_format input_format);
+                                   enum atomisp_input_format input_format);
 
 void atomisp_css_isys_two_stream_cfg_update_stream1(
                                    struct atomisp_sub_device *asd,
                                    enum atomisp_input_stream_id stream_id,
-                                   enum atomisp_css_stream_format input_format,
+                                   enum atomisp_input_format input_format,
                                    unsigned int width, unsigned int height);
 
 void atomisp_css_isys_two_stream_cfg_update_stream2(
                                    struct atomisp_sub_device *asd,
                                    enum atomisp_input_stream_id stream_id,
-                                   enum atomisp_css_stream_format input_format,
+                                   enum atomisp_input_format input_format,
                                    unsigned int width, unsigned int height);
 
 int atomisp_css_input_set_resolution(struct atomisp_sub_device *asd,
@@ -292,7 +290,7 @@ void atomisp_css_input_set_bayer_order(struct atomisp_sub_device *asd,
 
 void atomisp_css_input_set_format(struct atomisp_sub_device *asd,
                                enum atomisp_input_stream_id stream_id,
-                               enum atomisp_css_stream_format format);
+                               enum atomisp_input_format format);
 
 int atomisp_css_input_set_effective_resolution(
                                        struct atomisp_sub_device *asd,
@@ -334,11 +332,11 @@ void atomisp_css_enable_cvf(struct atomisp_sub_device *asd,
                                                        bool enable);
 
 int atomisp_css_input_configure_port(struct atomisp_sub_device *asd,
-                               mipi_port_ID_t port,
+                               enum mipi_port_id port,
                                unsigned int num_lanes,
                                unsigned int timeout,
                                unsigned int mipi_freq,
-                               enum atomisp_css_stream_format metadata_format,
+                               enum atomisp_input_format metadata_format,
                                unsigned int metadata_width,
                                unsigned int metadata_height);
 
index 7621b45371472bc70ef756ca2810e14795740b6e..f668c68dc33adeb5131b4c6b2e6b5921dc8fada5 100644 (file)
@@ -88,7 +88,7 @@ unsigned int atomisp_css_debug_get_dtrace_level(void)
        return ia_css_debug_trace_level;
 }
 
-void atomisp_css2_hw_store_8(hrt_address addr, uint8_t data)
+static void atomisp_css2_hw_store_8(hrt_address addr, uint8_t data)
 {
        unsigned long flags;
 
@@ -126,7 +126,7 @@ static uint8_t atomisp_css2_hw_load_8(hrt_address addr)
        return ret;
 }
 
-uint16_t atomisp_css2_hw_load_16(hrt_address addr)
+static uint16_t atomisp_css2_hw_load_16(hrt_address addr)
 {
        unsigned long flags;
        uint16_t ret;
@@ -136,7 +136,8 @@ uint16_t atomisp_css2_hw_load_16(hrt_address addr)
        spin_unlock_irqrestore(&mmio_lock, flags);
        return ret;
 }
-uint32_t atomisp_css2_hw_load_32(hrt_address addr)
+
+static uint32_t atomisp_css2_hw_load_32(hrt_address addr)
 {
        unsigned long flags;
        uint32_t ret;
@@ -1019,7 +1020,7 @@ int atomisp_css_irq_translate(struct atomisp_device *isp,
        return 0;
 }
 
-void atomisp_css_rx_get_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_get_irq_info(enum mipi_port_id port,
                                        unsigned int *infos)
 {
 #ifndef ISP2401_NEW_INPUT_SYSTEM
@@ -1029,7 +1030,7 @@ void atomisp_css_rx_get_irq_info(enum ia_css_csi2_port port,
 #endif
 }
 
-void atomisp_css_rx_clear_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_clear_irq_info(enum mipi_port_id port,
                                        unsigned int infos)
 {
 #ifndef ISP2401_NEW_INPUT_SYSTEM
@@ -1159,31 +1160,6 @@ void atomisp_css_mmu_invalidate_tlb(void)
        ia_css_mmu_invalidate_cache();
 }
 
-void atomisp_css_mmu_set_page_table_base_index(unsigned long base_index)
-{
-}
-
-/*
- * Check whether currently running MIPI buffer size fulfill
- * the requirement of the stream to be run
- */
-bool __need_realloc_mipi_buffer(struct atomisp_device *isp)
-{
-       unsigned int i;
-
-       for (i = 0; i < isp->num_of_streams; i++) {
-               struct atomisp_sub_device *asd = &isp->asd[i];
-
-               if (asd->streaming !=
-                               ATOMISP_DEVICE_STREAMING_ENABLED)
-                       continue;
-               if (asd->mipi_frame_size < isp->mipi_frame_size)
-                       return true;
-       }
-
-       return false;
-}
-
 int atomisp_css_start(struct atomisp_sub_device *asd,
                        enum atomisp_css_pipe_id pipe_id, bool in_reset)
 {
@@ -1808,7 +1784,7 @@ void atomisp_css_isys_set_valid(struct atomisp_sub_device *asd,
 
 void atomisp_css_isys_set_format(struct atomisp_sub_device *asd,
                                 enum atomisp_input_stream_id stream_id,
-                                enum atomisp_css_stream_format format,
+                                enum atomisp_input_format format,
                                 int isys_stream)
 {
 
@@ -1820,7 +1796,7 @@ void atomisp_css_isys_set_format(struct atomisp_sub_device *asd,
 
 void atomisp_css_input_set_format(struct atomisp_sub_device *asd,
                                        enum atomisp_input_stream_id stream_id,
-                                       enum atomisp_css_stream_format format)
+                                       enum atomisp_input_format format)
 {
 
        struct ia_css_stream_config *s_config =
@@ -1859,7 +1835,7 @@ int atomisp_css_set_default_isys_config(struct atomisp_sub_device *asd,
 
 int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
                                    enum atomisp_input_stream_id stream_id,
-                                   enum atomisp_css_stream_format input_format)
+                                   enum atomisp_input_format input_format)
 {
        struct ia_css_stream_config *s_config =
                &asd->stream_env[stream_id].stream_config;
@@ -1873,9 +1849,9 @@ int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
        s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_1].linked_isys_stream_id
                = IA_CSS_STREAM_ISYS_STREAM_0;
        s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_0].format =
-               IA_CSS_STREAM_FORMAT_USER_DEF1;
+               ATOMISP_INPUT_FORMAT_USER_DEF1;
        s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_1].format =
-               IA_CSS_STREAM_FORMAT_USER_DEF2;
+               ATOMISP_INPUT_FORMAT_USER_DEF2;
        s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_1].valid = true;
        return 0;
 }
@@ -1883,7 +1859,7 @@ int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
 void atomisp_css_isys_two_stream_cfg_update_stream1(
                                    struct atomisp_sub_device *asd,
                                    enum atomisp_input_stream_id stream_id,
-                                   enum atomisp_css_stream_format input_format,
+                                   enum atomisp_input_format input_format,
                                    unsigned int width, unsigned int height)
 {
        struct ia_css_stream_config *s_config =
@@ -1901,7 +1877,7 @@ void atomisp_css_isys_two_stream_cfg_update_stream1(
 void atomisp_css_isys_two_stream_cfg_update_stream2(
                                    struct atomisp_sub_device *asd,
                                    enum atomisp_input_stream_id stream_id,
-                                   enum atomisp_css_stream_format input_format,
+                                   enum atomisp_input_format input_format,
                                    unsigned int width, unsigned int height)
 {
        struct ia_css_stream_config *s_config =
@@ -2142,11 +2118,11 @@ void atomisp_css_enable_cvf(struct atomisp_sub_device *asd,
 
 int atomisp_css_input_configure_port(
                struct atomisp_sub_device *asd,
-               mipi_port_ID_t port,
+               enum mipi_port_id port,
                unsigned int num_lanes,
                unsigned int timeout,
                unsigned int mipi_freq,
-               enum atomisp_css_stream_format metadata_format,
+               enum atomisp_input_format metadata_format,
                unsigned int metadata_width,
                unsigned int metadata_height)
 {
@@ -2890,8 +2866,8 @@ stream_err:
        return -EINVAL;
 }
 
-unsigned int atomisp_get_pipe_index(struct atomisp_sub_device *asd,
-                                       uint16_t source_pad)
+static unsigned int atomisp_get_pipe_index(struct atomisp_sub_device *asd,
+                                          uint16_t source_pad)
 {
        struct atomisp_device *isp = asd->isp;
        /*
index b03711668eda1de85cd6a63cf57d290063fd5aff..a06c5b6e8027d9e3ffc3cc6afac40d8bf0b8e6f3 100644 (file)
@@ -37,7 +37,6 @@
 #define atomisp_css_irq_info  ia_css_irq_info
 #define atomisp_css_isp_config ia_css_isp_config
 #define atomisp_css_bayer_order ia_css_bayer_order
-#define atomisp_css_stream_format ia_css_stream_format
 #define atomisp_css_capture_mode ia_css_capture_mode
 #define atomisp_css_input_mode ia_css_input_mode
 #define atomisp_css_frame ia_css_frame
  */
 #define CSS_ID(val)    (IA_ ## val)
 #define CSS_EVENT(val) (IA_CSS_EVENT_TYPE_ ## val)
-#define CSS_FORMAT(val)        (IA_CSS_STREAM_FORMAT_ ## val)
+#define CSS_FORMAT(val)        (ATOMISP_INPUT_FORMAT_ ## val)
 
 #define CSS_EVENT_PORT_EOF     CSS_EVENT(PORT_EOF)
 #define CSS_EVENT_FRAME_TAGGED CSS_EVENT(FRAME_TAGGED)
index ceedb82b6beb76cc9646878916d6284ebc2ddcd6..a815c768bda945256e58c71f1c152d67e2845784 100644 (file)
@@ -22,6 +22,7 @@
 #include "atomisp_compat.h"
 #include "atomisp_internal.h"
 #include "atomisp_ioctl.h"
+#include "atomisp_drvfs.h"
 #include "hmm/hmm.h"
 
 /*
index 545ef024841d0f5f92063e2aee32480757131f8f..709137f25700807059eddc368bd55a69d5910189 100644 (file)
@@ -689,7 +689,7 @@ static void atomisp_dev_init_struct(struct atomisp_device *isp)
 {
        unsigned int i;
 
-       isp->sw_contex.file_input = 0;
+       isp->sw_contex.file_input = false;
        isp->need_gfx_throttle = true;
        isp->isp_fatal_error = false;
        isp->mipi_frame_size = 0;
@@ -708,12 +708,12 @@ static void atomisp_subdev_init_struct(struct atomisp_sub_device *asd)
        v4l2_ctrl_s_ctrl(asd->run_mode, ATOMISP_RUN_MODE_STILL_CAPTURE);
        memset(&asd->params.css_param, 0, sizeof(asd->params.css_param));
        asd->params.color_effect = V4L2_COLORFX_NONE;
-       asd->params.bad_pixel_en = 1;
-       asd->params.gdc_cac_en = 0;
-       asd->params.video_dis_en = 0;
-       asd->params.sc_en = 0;
-       asd->params.fpn_en = 0;
-       asd->params.xnr_en = 0;
+       asd->params.bad_pixel_en = true;
+       asd->params.gdc_cac_en = false;
+       asd->params.video_dis_en = false;
+       asd->params.sc_en = false;
+       asd->params.fpn_en = false;
+       asd->params.xnr_en = false;
        asd->params.false_color = 0;
        asd->params.online_process = 1;
        asd->params.yuv_ds_en = 0;
index 5c84dd63778ed9dfcc299ca7d7d7d0ccb80c1c2a..61bd550dafb96068d1752e3f9fb6c455ac608e11 100644 (file)
@@ -1607,10 +1607,12 @@ int atomisp_stream_on_master_slave_sensor(struct atomisp_device *isp,
 
 /* FIXME! */
 #ifndef ISP2401
-void __wdt_on_master_slave_sensor(struct atomisp_device *isp, unsigned int wdt_duration)
+static void __wdt_on_master_slave_sensor(struct atomisp_device *isp,
+                                        unsigned int wdt_duration)
 #else
-void __wdt_on_master_slave_sensor(struct atomisp_video_pipe *pipe,
-                               unsigned int wdt_duration, bool enable)
+static void __wdt_on_master_slave_sensor(struct atomisp_video_pipe *pipe,
+                                        unsigned int wdt_duration,
+                                        bool enable)
 #endif
 {
 #ifndef ISP2401
@@ -2731,7 +2733,7 @@ static int atomisp_s_parm_file(struct file *file, void *fh,
        }
 
        rt_mutex_lock(&isp->mutex);
-       isp->sw_contex.file_input = 1;
+       isp->sw_contex.file_input = true;
        rt_mutex_unlock(&isp->mutex);
 
        return 0;
index b78276ac22dafa493c39eeb8aedbfdae8aba6741..49a9973b4289a814733126dd2d7c891197d2cb02 100644 (file)
@@ -42,17 +42,17 @@ const struct atomisp_in_fmt_conv atomisp_in_fmt_conv[] = {
        { MEDIA_BUS_FMT_SGBRG12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, CSS_BAYER_ORDER_GBRG, CSS_FORMAT_RAW_12 },
        { MEDIA_BUS_FMT_SGRBG12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, CSS_BAYER_ORDER_GRBG, CSS_FORMAT_RAW_12 },
        { MEDIA_BUS_FMT_SRGGB12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, CSS_BAYER_ORDER_RGGB, CSS_FORMAT_RAW_12 },
-       { MEDIA_BUS_FMT_UYVY8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, IA_CSS_STREAM_FORMAT_YUV422_8 },
-       { MEDIA_BUS_FMT_YUYV8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, IA_CSS_STREAM_FORMAT_YUV422_8 },
-       { MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, IA_CSS_STREAM_FORMAT_BINARY_8 },
+       { MEDIA_BUS_FMT_UYVY8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, ATOMISP_INPUT_FORMAT_YUV422_8 },
+       { MEDIA_BUS_FMT_YUYV8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, ATOMISP_INPUT_FORMAT_YUV422_8 },
+       { MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, ATOMISP_INPUT_FORMAT_BINARY_8 },
        { V4L2_MBUS_FMT_CUSTOM_NV12, 12, 12, CSS_FRAME_FORMAT_NV12, 0, CSS_FRAME_FORMAT_NV12 },
        { V4L2_MBUS_FMT_CUSTOM_NV21, 12, 12, CSS_FRAME_FORMAT_NV21, 0, CSS_FRAME_FORMAT_NV21 },
-       { V4L2_MBUS_FMT_CUSTOM_YUV420, 12, 12, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, 0, IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY },
+       { V4L2_MBUS_FMT_CUSTOM_YUV420, 12, 12, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, 0, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY },
 #if 0
-       { V4L2_MBUS_FMT_CUSTOM_M10MO_RAW, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, IA_CSS_STREAM_FORMAT_BINARY_8 },
+       { V4L2_MBUS_FMT_CUSTOM_M10MO_RAW, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, ATOMISP_INPUT_FORMAT_BINARY_8 },
 #endif
        /* no valid V4L2 MBUS code for metadata format, so leave it 0. */
-       { 0, 0, 0, ATOMISP_INPUT_FORMAT_EMBEDDED, 0, IA_CSS_STREAM_FORMAT_EMBEDDED },
+       { 0, 0, 0, ATOMISP_INPUT_FORMAT_EMBEDDED, 0, ATOMISP_INPUT_FORMAT_EMBEDDED },
        {}
 };
 
@@ -101,7 +101,7 @@ const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv(u32 code)
 }
 
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(
-       enum atomisp_css_stream_format atomisp_in_fmt)
+       enum atomisp_input_format atomisp_in_fmt)
 {
        int i;
 
index c3eba675da065d8833495b310f22f5dcc6805ca9..59ff8723c182781f38c54407a7633b30f57885c0 100644 (file)
@@ -58,9 +58,9 @@ struct atomisp_in_fmt_conv {
        u32     code;
        uint8_t bpp; /* bits per pixel */
        uint8_t depth; /* uncompressed */
-       enum atomisp_css_stream_format atomisp_in_fmt;
+       enum atomisp_input_format atomisp_in_fmt;
        enum atomisp_css_bayer_order bayer_order;
-       enum ia_css_stream_format css_stream_fmt;
+       enum atomisp_input_format css_stream_fmt;
 };
 
 struct atomisp_sub_device;
@@ -424,10 +424,10 @@ bool atomisp_subdev_is_compressed(u32 code);
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv(u32 code);
 #ifndef ISP2401
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(
-       enum atomisp_css_stream_format atomisp_in_fmt);
+       enum atomisp_input_format atomisp_in_fmt);
 #else
 const struct atomisp_in_fmt_conv
-    *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(enum atomisp_css_stream_format
+    *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(enum atomisp_input_format
                                                atomisp_in_fmt);
 #endif
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv_compressed(u32 code);
index a8c27676a38b353a67853dceecea81c737601390..5ab48f346790b3d7d45e2b7a6e82da8f5a306b60 100644 (file)
@@ -116,7 +116,7 @@ extern bool ia_css_util_resolution_is_even(
  *
  */
 extern unsigned int ia_css_util_input_format_bpp(
-       enum ia_css_stream_format stream_format,
+       enum atomisp_input_format stream_format,
        bool two_ppc);
 
 /* @brief check if input format it raw
@@ -126,7 +126,7 @@ extern unsigned int ia_css_util_input_format_bpp(
  *
  */
 extern bool ia_css_util_is_input_format_raw(
-       enum ia_css_stream_format stream_format);
+       enum atomisp_input_format stream_format);
 
 /* @brief check if input format it yuv
  *
@@ -135,7 +135,7 @@ extern bool ia_css_util_is_input_format_raw(
  *
  */
 extern bool ia_css_util_is_input_format_yuv(
-       enum ia_css_stream_format stream_format);
+       enum atomisp_input_format stream_format);
 
 #endif /* __IA_CSS_UTIL_H__ */
 
index 54193789a809e36f1b8a2e3412ec30eda389641c..91e586112332903ba9757c430f64e6db5153d5c3 100644 (file)
@@ -52,55 +52,55 @@ enum ia_css_err ia_css_convert_errno(
 
 /* MW: Table look-up ??? */
 unsigned int ia_css_util_input_format_bpp(
-       enum ia_css_stream_format format,
+       enum atomisp_input_format format,
        bool two_ppc)
 {
        unsigned int rval = 0;
        switch (format) {
-       case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
-       case IA_CSS_STREAM_FORMAT_YUV420_8:
-       case IA_CSS_STREAM_FORMAT_YUV422_8:
-       case IA_CSS_STREAM_FORMAT_RGB_888:
-       case IA_CSS_STREAM_FORMAT_RAW_8:
-       case IA_CSS_STREAM_FORMAT_BINARY_8:
-       case IA_CSS_STREAM_FORMAT_EMBEDDED:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
+       case ATOMISP_INPUT_FORMAT_YUV420_8:
+       case ATOMISP_INPUT_FORMAT_YUV422_8:
+       case ATOMISP_INPUT_FORMAT_RGB_888:
+       case ATOMISP_INPUT_FORMAT_RAW_8:
+       case ATOMISP_INPUT_FORMAT_BINARY_8:
+       case ATOMISP_INPUT_FORMAT_EMBEDDED:
                rval = 8;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_10:
-       case IA_CSS_STREAM_FORMAT_YUV422_10:
-       case IA_CSS_STREAM_FORMAT_RAW_10:
+       case ATOMISP_INPUT_FORMAT_YUV420_10:
+       case ATOMISP_INPUT_FORMAT_YUV422_10:
+       case ATOMISP_INPUT_FORMAT_RAW_10:
                rval = 10;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_16:
-       case IA_CSS_STREAM_FORMAT_YUV422_16:
+       case ATOMISP_INPUT_FORMAT_YUV420_16:
+       case ATOMISP_INPUT_FORMAT_YUV422_16:
                rval = 16;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_444:
+       case ATOMISP_INPUT_FORMAT_RGB_444:
                rval = 4;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_555:
+       case ATOMISP_INPUT_FORMAT_RGB_555:
                rval = 5;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_565:
+       case ATOMISP_INPUT_FORMAT_RGB_565:
                rval = 65;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_666:
-       case IA_CSS_STREAM_FORMAT_RAW_6:
+       case ATOMISP_INPUT_FORMAT_RGB_666:
+       case ATOMISP_INPUT_FORMAT_RAW_6:
                rval = 6;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_7:
+       case ATOMISP_INPUT_FORMAT_RAW_7:
                rval = 7;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_12:
+       case ATOMISP_INPUT_FORMAT_RAW_12:
                rval = 12;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_14:
+       case ATOMISP_INPUT_FORMAT_RAW_14:
                if (two_ppc)
                        rval = 14;
                else
                        rval = 12;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_16:
+       case ATOMISP_INPUT_FORMAT_RAW_16:
                if (two_ppc)
                        rval = 16;
                else
@@ -175,28 +175,28 @@ bool ia_css_util_resolution_is_even(const struct ia_css_resolution resolution)
 }
 
 #endif
-bool ia_css_util_is_input_format_raw(enum ia_css_stream_format format)
+bool ia_css_util_is_input_format_raw(enum atomisp_input_format format)
 {
-       return ((format == IA_CSS_STREAM_FORMAT_RAW_6) ||
-               (format == IA_CSS_STREAM_FORMAT_RAW_7) ||
-               (format == IA_CSS_STREAM_FORMAT_RAW_8) ||
-               (format == IA_CSS_STREAM_FORMAT_RAW_10) ||
-               (format == IA_CSS_STREAM_FORMAT_RAW_12));
+       return ((format == ATOMISP_INPUT_FORMAT_RAW_6) ||
+               (format == ATOMISP_INPUT_FORMAT_RAW_7) ||
+               (format == ATOMISP_INPUT_FORMAT_RAW_8) ||
+               (format == ATOMISP_INPUT_FORMAT_RAW_10) ||
+               (format == ATOMISP_INPUT_FORMAT_RAW_12));
        /* raw_14 and raw_16 are not supported as input formats to the ISP.
         * They can only be copied to a frame in memory using the
         * copy binary.
         */
 }
 
-bool ia_css_util_is_input_format_yuv(enum ia_css_stream_format format)
+bool ia_css_util_is_input_format_yuv(enum atomisp_input_format format)
 {
-       return format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY ||
-           format == IA_CSS_STREAM_FORMAT_YUV420_8  ||
-           format == IA_CSS_STREAM_FORMAT_YUV420_10 ||
-           format == IA_CSS_STREAM_FORMAT_YUV420_16 ||
-           format == IA_CSS_STREAM_FORMAT_YUV422_8  ||
-           format == IA_CSS_STREAM_FORMAT_YUV422_10 ||
-           format == IA_CSS_STREAM_FORMAT_YUV422_16;
+       return format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY ||
+           format == ATOMISP_INPUT_FORMAT_YUV420_8  ||
+           format == ATOMISP_INPUT_FORMAT_YUV420_10 ||
+           format == ATOMISP_INPUT_FORMAT_YUV420_16 ||
+           format == ATOMISP_INPUT_FORMAT_YUV422_8  ||
+           format == ATOMISP_INPUT_FORMAT_YUV422_10 ||
+           format == ATOMISP_INPUT_FORMAT_YUV422_16;
 }
 
 enum ia_css_err ia_css_util_check_input(
index d2e3a2deea2ecf373b4a05a2584bec76855e059a..7907f0ff6d6c32e4a76831ca96b40d37e1604dac 100644 (file)
@@ -284,12 +284,12 @@ typedef enum {
        N_RX_ID
 } rx_ID_t;
 
-typedef enum {
+enum mipi_port_id {
        MIPI_PORT0_ID = 0,
        MIPI_PORT1_ID,
        MIPI_PORT2_ID,
        N_MIPI_PORT_ID
-} mipi_port_ID_t;
+};
 
 #define        N_RX_CHANNEL_ID         4
 
index c412810887b34d90a222868b7b45e49e386f8708..dcb9a3127cfe0733fcf7b5ec6ab732d8d3d66f7e 100644 (file)
@@ -29,7 +29,7 @@
 hrt_address    debug_buffer_address = (hrt_address)-1;
 hrt_vaddress   debug_buffer_ddr_address = (hrt_vaddress)-1;
 /* The local copy */
-debug_data_t           debug_data;
+static debug_data_t            debug_data;
 debug_data_t           *debug_data_ptr = &debug_data;
 
 void debug_buffer_init(const hrt_address addr)
index bcfd443f52022100158da33ed55c1ecc0d37dc0e..b6b1344786b15e50f2770a9dce808c26714543fe 100644 (file)
@@ -29,7 +29,7 @@ gp_timer_reg_load(uint32_t reg);
 static void
 gp_timer_reg_store(uint32_t reg, uint32_t value);
 
-uint32_t
+static uint32_t
 gp_timer_reg_load(uint32_t reg)
 {
        return ia_css_device_load_uint32(
index a8997e45738e322898876e387b92ca9cbeaeaabf..0e1ca995fb06f0d9c3be89eff1a0eea6af86f437 100644 (file)
@@ -45,8 +45,9 @@ const uint8_t HIVE_IF_SWITCH_CODE[N_INPUT_FORMATTER_ID] = {
        HIVE_INPUT_SWITCH_SELECT_STR_TO_MEM};
 
 /* MW Should be part of system_global.h, where we have the main enumeration */
-const bool HIVE_IF_BIN_COPY[N_INPUT_FORMATTER_ID] = {
-       false, false, false, true};
+static const bool HIVE_IF_BIN_COPY[N_INPUT_FORMATTER_ID] = {
+       false, false, false, true
+};
 
 void input_formatter_rst(
        const input_formatter_ID_t              ID)
index bd6821e436b26263ccd448beb9f9a76ce44a7934..2515e162828febc35f35b33d607fc0553460e189 100644 (file)
@@ -29,7 +29,7 @@
 #define ZERO (0x0)
 #define ONE  (1U)
 
-const ib_buffer_t   IB_BUFFER_NULL = {0 ,0, 0 };
+static const ib_buffer_t   IB_BUFFER_NULL = {0 ,0, 0 };
 
 static input_system_error_t input_system_configure_channel(
        const channel_cfg_t             channel);
@@ -98,7 +98,7 @@ static inline void ctrl_unit_get_state(
 
 static inline void mipi_port_get_state(
        const rx_ID_t                                   ID,
-       const mipi_port_ID_t                    port_ID,
+       const enum mipi_port_id                 port_ID,
        mipi_port_state_t                               *state);
 
 static inline void rx_channel_get_state(
@@ -180,7 +180,7 @@ void receiver_get_state(
        const rx_ID_t                           ID,
        receiver_state_t                        *state)
 {
-       mipi_port_ID_t  port_id;
+       enum mipi_port_id       port_id;
        unsigned int    ch_id;
 
        assert(ID < N_RX_ID);
@@ -209,7 +209,7 @@ void receiver_get_state(
        state->raw16 = (uint16_t)receiver_reg_load(ID,
                _HRT_CSS_RECEIVER_RAW16_REG_IDX);
 
-       for (port_id = (mipi_port_ID_t)0; port_id < N_MIPI_PORT_ID; port_id++) {
+       for (port_id = (enum mipi_port_id)0; port_id < N_MIPI_PORT_ID; port_id++) {
                mipi_port_get_state(ID, port_id,
                        &(state->mipi_port_state[port_id]));
        }
@@ -305,7 +305,7 @@ void receiver_set_compression(
 
 void receiver_port_enable(
        const rx_ID_t                   ID,
-       const mipi_port_ID_t            port_ID,
+       const enum mipi_port_id         port_ID,
        const bool                      cnd)
 {
        hrt_data        reg = receiver_port_reg_load(ID, port_ID,
@@ -324,7 +324,7 @@ void receiver_port_enable(
 
 bool is_receiver_port_enabled(
        const rx_ID_t                   ID,
-       const mipi_port_ID_t            port_ID)
+       const enum mipi_port_id         port_ID)
 {
        hrt_data        reg = receiver_port_reg_load(ID, port_ID,
                _HRT_CSS_RECEIVER_DEVICE_READY_REG_IDX);
@@ -333,7 +333,7 @@ bool is_receiver_port_enabled(
 
 void receiver_irq_enable(
        const rx_ID_t                   ID,
-       const mipi_port_ID_t            port_ID,
+       const enum mipi_port_id         port_ID,
        const rx_irq_info_t             irq_info)
 {
        receiver_port_reg_store(ID,
@@ -343,7 +343,7 @@ void receiver_irq_enable(
 
 rx_irq_info_t receiver_get_irq_info(
        const rx_ID_t                   ID,
-       const mipi_port_ID_t            port_ID)
+       const enum mipi_port_id         port_ID)
 {
        return receiver_port_reg_load(ID,
        port_ID, _HRT_CSS_RECEIVER_IRQ_STATUS_REG_IDX);
@@ -351,7 +351,7 @@ rx_irq_info_t receiver_get_irq_info(
 
 void receiver_irq_clear(
        const rx_ID_t                   ID,
-       const mipi_port_ID_t            port_ID,
+       const enum mipi_port_id         port_ID,
        const rx_irq_info_t             irq_info)
 {
        receiver_port_reg_store(ID,
@@ -556,7 +556,7 @@ static inline void ctrl_unit_get_state(
 
 static inline void mipi_port_get_state(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t                    port_ID,
+       const enum mipi_port_id                 port_ID,
        mipi_port_state_t                       *state)
 {
        int     i;
@@ -644,12 +644,12 @@ static inline void rx_channel_get_state(
 }
 
 // MW: "2400" in the name is not good, but this is to avoid a naming conflict
-input_system_cfg2400_t config;
+static input_system_cfg2400_t config;
 
 static void receiver_rst(
        const rx_ID_t                           ID)
 {
-       mipi_port_ID_t          port_id;
+       enum mipi_port_id               port_id;
 
        assert(ID < N_RX_ID);
 
index 3e8bd00082dcd4a4cc099216bdbdab32aefcf802..bf9230fd08f235ca5c6e47df003ca3f9d8d0cf3b 100644 (file)
@@ -353,7 +353,7 @@ typedef struct rx_cfg_s             rx_cfg_t;
  */
 struct rx_cfg_s {
        rx_mode_t                       mode;   /* The HW config */
-       mipi_port_ID_t          port;   /* The port ID to apply the control on */
+       enum mipi_port_id               port;   /* The port ID to apply the control on */
        unsigned int            timeout;
        unsigned int            initcount;
        unsigned int            synccount;
index 118185eb86e9bd44d392c8d138fa3ace9ece6d36..48876bb08b7080fb2acdf37221c9e15c5cfc4ae2 100644 (file)
@@ -63,7 +63,7 @@ STORAGE_CLASS_INPUT_SYSTEM_C hrt_data receiver_reg_load(
 
 STORAGE_CLASS_INPUT_SYSTEM_C void receiver_port_reg_store(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t                    port_ID,
+       const enum mipi_port_id                 port_ID,
        const hrt_address                       reg,
        const hrt_data                          value)
 {
@@ -77,7 +77,7 @@ STORAGE_CLASS_INPUT_SYSTEM_C void receiver_port_reg_store(
 
 STORAGE_CLASS_INPUT_SYSTEM_C hrt_data receiver_port_reg_load(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t                    port_ID,
+       const enum mipi_port_id                 port_ID,
        const hrt_address                       reg)
 {
        assert(ID < N_RX_ID);
index d803efd7400ab3dce8fbbad7fe67320be19b469a..6f63962a54e8fc091b808beeaca3b033dd805ce7 100644 (file)
@@ -266,12 +266,12 @@ typedef enum {
        N_RX_ID
 } rx_ID_t;
 
-typedef enum {
+enum mipi_port_id {
        MIPI_PORT0_ID = 0,
        MIPI_PORT1_ID,
        MIPI_PORT2_ID,
        N_MIPI_PORT_ID
-} mipi_port_ID_t;
+};
 
 #define        N_RX_CHANNEL_ID         4
 
index 1596757fe9ef86ce565f1ffa37a10f1cb10fa1f9..6e37ff0fe0f911dcb8e71279f21735c484361860 100644 (file)
@@ -83,7 +83,7 @@ extern void receiver_set_compression(
  */
 extern void receiver_port_enable(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t            port_ID,
+       const enum mipi_port_id         port_ID,
        const bool                                      cnd);
 
 /*! Flag if PORT[port_ID] of RECEIVER[ID] is enabled
@@ -95,7 +95,7 @@ extern void receiver_port_enable(
  */
 extern bool is_receiver_port_enabled(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t            port_ID);
+       const enum mipi_port_id         port_ID);
 
 /*! Enable the IRQ channels of PORT[port_ID] of RECEIVER[ID]
 
@@ -107,7 +107,7 @@ extern bool is_receiver_port_enabled(
  */
 extern void receiver_irq_enable(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t            port_ID,
+       const enum mipi_port_id         port_ID,
        const rx_irq_info_t                     irq_info);
 
 /*! Return the IRQ status of PORT[port_ID] of RECEIVER[ID]
@@ -119,7 +119,7 @@ extern void receiver_irq_enable(
  */
 extern rx_irq_info_t receiver_get_irq_info(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t            port_ID);
+       const enum mipi_port_id         port_ID);
 
 /*! Clear the IRQ status of PORT[port_ID] of RECEIVER[ID]
 
@@ -131,7 +131,7 @@ extern rx_irq_info_t receiver_get_irq_info(
  */
 extern void receiver_irq_clear(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t                    port_ID,
+       const enum mipi_port_id                 port_ID,
        const rx_irq_info_t                     irq_info);
 
 /*! Write to a control register of INPUT_SYSTEM[ID]
@@ -195,7 +195,7 @@ STORAGE_CLASS_INPUT_SYSTEM_H hrt_data receiver_reg_load(
  */
 STORAGE_CLASS_INPUT_SYSTEM_H void receiver_port_reg_store(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t                    port_ID,
+       const enum mipi_port_id                 port_ID,
        const hrt_address                       reg,
        const hrt_data                          value);
 
@@ -210,7 +210,7 @@ STORAGE_CLASS_INPUT_SYSTEM_H void receiver_port_reg_store(
  */
 STORAGE_CLASS_INPUT_SYSTEM_H hrt_data receiver_port_reg_load(
        const rx_ID_t                           ID,
-       const mipi_port_ID_t            port_ID,
+       const enum mipi_port_id         port_ID,
        const hrt_address                       reg);
 
 /*! Write to a control register of SUB_SYSTEM[sub_ID] of INPUT_SYSTEM[ID]
index f415570a3da98a3e4080cb46b8c11270ec3a762c..ad9ca5449369c6805cc230e55cf8bdc1e6d0ca22 100644 (file)
@@ -12,6 +12,9 @@
  * more details.
  */
 
+/* For MIPI_PORT0_ID to MIPI_PORT2_ID */
+#include "system_global.h"
+
 #ifndef __IA_CSS_INPUT_PORT_H
 #define __IA_CSS_INPUT_PORT_H
 
  * This file contains information about the possible input ports for CSS
  */
 
-/* Enumeration of the physical input ports on the CSS hardware.
- *  There are 3 MIPI CSI-2 ports.
- */
-enum ia_css_csi2_port {
-       IA_CSS_CSI2_PORT0, /* Implicitly map to MIPI_PORT0_ID */
-       IA_CSS_CSI2_PORT1, /* Implicitly map to MIPI_PORT1_ID */
-       IA_CSS_CSI2_PORT2  /* Implicitly map to MIPI_PORT2_ID */
-};
-
 /* Backward compatible for CSS API 2.0 only
  *  TO BE REMOVED when all drivers move to CSS API 2.1
  */
-#define        IA_CSS_CSI2_PORT_4LANE IA_CSS_CSI2_PORT0
-#define        IA_CSS_CSI2_PORT_1LANE IA_CSS_CSI2_PORT1
-#define        IA_CSS_CSI2_PORT_2LANE IA_CSS_CSI2_PORT2
+#define        IA_CSS_CSI2_PORT_4LANE MIPI_PORT0_ID
+#define        IA_CSS_CSI2_PORT_1LANE MIPI_PORT1_ID
+#define        IA_CSS_CSI2_PORT_2LANE MIPI_PORT2_ID
 
 /* The CSI2 interface supports 2 types of compression or can
  *  be run without compression.
@@ -56,7 +50,7 @@ struct ia_css_csi2_compression {
 /* Input port structure.
  */
 struct ia_css_input_port {
-       enum ia_css_csi2_port port; /** Physical CSI-2 port */
+       enum mipi_port_id port; /** Physical CSI-2 port */
        unsigned int num_lanes; /** Number of lanes used (4-lane port only) */
        unsigned int timeout;   /** Timeout value */
        unsigned int rxcount;   /** Register value, should include all lanes */
index 10ef61178bb2bae95a1e65d873c973f75da2b7b2..c8840138899ac16360832bb6ddc4680b8ba49291 100644 (file)
@@ -186,7 +186,7 @@ ia_css_rx_get_irq_info(unsigned int *irq_bits);
  * that occurred.
  */
 void
-ia_css_rx_port_get_irq_info(enum ia_css_csi2_port port, unsigned int *irq_bits);
+ia_css_rx_port_get_irq_info(enum mipi_port_id port, unsigned int *irq_bits);
 
 /* @brief Clear CSI receiver error info.
  *
@@ -218,7 +218,7 @@ ia_css_rx_clear_irq_info(unsigned int irq_bits);
  * error bits get overwritten.
  */
 void
-ia_css_rx_port_clear_irq_info(enum ia_css_csi2_port port, unsigned int irq_bits);
+ia_css_rx_port_clear_irq_info(enum mipi_port_id port, unsigned int irq_bits);
 
 /* @brief Enable or disable specific interrupts.
  *
index 8b674c98224c2b73111c664c071fd1f442557100..ed0b6ab371dac00e43816676d95a0e8d68975969 100644 (file)
@@ -27,8 +27,8 @@
  *  to process sensor metadata.
  */
 struct ia_css_metadata_config {
-       enum ia_css_stream_format data_type; /** Data type of CSI-2 embedded
-                       data. The default value is IA_CSS_STREAM_FORMAT_EMBEDDED. For
+       enum atomisp_input_format data_type; /** Data type of CSI-2 embedded
+                       data. The default value is ATOMISP_INPUT_FORMAT_EMBEDDED. For
                        certain sensors, user can choose non-default data type for embedded
                        data. */
        struct ia_css_resolution  resolution; /** Resolution */
index f9c9cd76be97c7c52318701bbf413b8a9ff7c2f9..367b2aafa5e83f04b3709966fbc453457f35a3c0 100644 (file)
@@ -55,7 +55,7 @@ ia_css_mipi_frame_specify(const unsigned int  size_mem_words,
  *
  */
 enum ia_css_err
-ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
+ia_css_mipi_frame_enable_check_on_size(const enum mipi_port_id port,
                                const unsigned int      size_mem_words);
 #endif
 
@@ -74,7 +74,7 @@ ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
 enum ia_css_err
 ia_css_mipi_frame_calculate_size(const unsigned int width,
                                const unsigned int height,
-                               const enum ia_css_stream_format format,
+                               const enum atomisp_input_format format,
                                const bool hasSOLandEOL,
                                const unsigned int embedded_data_size_words,
                                unsigned int *size_mem_words);
index f7e9020a86e14d979015ba46c87554d7e7efe30b..f97b9eb2b19c23ca32d62e309cd2d54a8a6cf889 100644 (file)
  */
 
 #include <type_support.h> /* bool */
-
-/* The ISP streaming input interface supports the following formats.
- *  These match the corresponding MIPI formats.
- */
-enum ia_css_stream_format {
-       IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY,    /** 8 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_YUV420_8,  /** 8 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_YUV420_10, /** 10 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_YUV420_16, /** 16 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_YUV422_8,  /** UYVY..UYVY, 8 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_YUV422_10, /** UYVY..UYVY, 10 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_YUV422_16, /** UYVY..UYVY, 16 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_RGB_444,  /** BGR..BGR, 4 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_RGB_555,  /** BGR..BGR, 5 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_RGB_565,  /** BGR..BGR, 5 bits B and R, 6 bits G */
-       IA_CSS_STREAM_FORMAT_RGB_666,  /** BGR..BGR, 6 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_RGB_888,  /** BGR..BGR, 8 bits per subpixel */
-       IA_CSS_STREAM_FORMAT_RAW_6,    /** RAW data, 6 bits per pixel */
-       IA_CSS_STREAM_FORMAT_RAW_7,    /** RAW data, 7 bits per pixel */
-       IA_CSS_STREAM_FORMAT_RAW_8,    /** RAW data, 8 bits per pixel */
-       IA_CSS_STREAM_FORMAT_RAW_10,   /** RAW data, 10 bits per pixel */
-       IA_CSS_STREAM_FORMAT_RAW_12,   /** RAW data, 12 bits per pixel */
-       IA_CSS_STREAM_FORMAT_RAW_14,   /** RAW data, 14 bits per pixel */
-       IA_CSS_STREAM_FORMAT_RAW_16,   /** RAW data, 16 bits per pixel, which is
-                                           not specified in CSI-MIPI standard*/
-       IA_CSS_STREAM_FORMAT_BINARY_8, /** Binary byte stream, which is target at
-                                           JPEG. */
-
-       /* CSI2-MIPI specific format: Generic short packet data. It is used to
-        *  keep the timing information for the opening/closing of shutters,
-        *  triggering of flashes and etc.
-        */
-       IA_CSS_STREAM_FORMAT_GENERIC_SHORT1,  /** Generic Short Packet Code 1 */
-       IA_CSS_STREAM_FORMAT_GENERIC_SHORT2,  /** Generic Short Packet Code 2 */
-       IA_CSS_STREAM_FORMAT_GENERIC_SHORT3,  /** Generic Short Packet Code 3 */
-       IA_CSS_STREAM_FORMAT_GENERIC_SHORT4,  /** Generic Short Packet Code 4 */
-       IA_CSS_STREAM_FORMAT_GENERIC_SHORT5,  /** Generic Short Packet Code 5 */
-       IA_CSS_STREAM_FORMAT_GENERIC_SHORT6,  /** Generic Short Packet Code 6 */
-       IA_CSS_STREAM_FORMAT_GENERIC_SHORT7,  /** Generic Short Packet Code 7 */
-       IA_CSS_STREAM_FORMAT_GENERIC_SHORT8,  /** Generic Short Packet Code 8 */
-
-       /* CSI2-MIPI specific format: YUV data.
-        */
-       IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT,  /** YUV420 8-bit (Chroma Shifted Pixel Sampling) */
-       IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT, /** YUV420 8-bit (Chroma Shifted Pixel Sampling) */
-
-       /* CSI2-MIPI specific format: Generic long packet data
-        */
-       IA_CSS_STREAM_FORMAT_EMBEDDED, /** Embedded 8-bit non Image Data */
-
-       /* CSI2-MIPI specific format: User defined byte-based data. For example,
-        *  the data transmitter (e.g. the SoC sensor) can keep the JPEG data as
-        *  the User Defined Data Type 4 and the MPEG data as the
-        *  User Defined Data Type 7.
-        */
-       IA_CSS_STREAM_FORMAT_USER_DEF1,  /** User defined 8-bit data type 1 */
-       IA_CSS_STREAM_FORMAT_USER_DEF2,  /** User defined 8-bit data type 2 */
-       IA_CSS_STREAM_FORMAT_USER_DEF3,  /** User defined 8-bit data type 3 */
-       IA_CSS_STREAM_FORMAT_USER_DEF4,  /** User defined 8-bit data type 4 */
-       IA_CSS_STREAM_FORMAT_USER_DEF5,  /** User defined 8-bit data type 5 */
-       IA_CSS_STREAM_FORMAT_USER_DEF6,  /** User defined 8-bit data type 6 */
-       IA_CSS_STREAM_FORMAT_USER_DEF7,  /** User defined 8-bit data type 7 */
-       IA_CSS_STREAM_FORMAT_USER_DEF8,  /** User defined 8-bit data type 8 */
-};
-
-#define        IA_CSS_STREAM_FORMAT_NUM        IA_CSS_STREAM_FORMAT_USER_DEF8
+#include "../../../include/linux/atomisp_platform.h"
 
 unsigned int ia_css_util_input_format_bpp(
-       enum ia_css_stream_format format,
+       enum atomisp_input_format format,
        bool two_ppc);
 
-#endif /* __IA_CSS_STREAM_FORMAT_H */
+#endif /* __ATOMISP_INPUT_FORMAT_H */
index ca3203357ff5f2e68f36dc837781ca8b1a7f0ff6..ddefad330db7b2e0b1ddee5f5e460ae7852b77d1 100644 (file)
@@ -62,7 +62,7 @@ enum {
  */
 struct ia_css_stream_isys_stream_config {
        struct ia_css_resolution  input_res; /** Resolution of input data */
-       enum ia_css_stream_format format; /** Format of input stream. This data
+       enum atomisp_input_format format; /** Format of input stream. This data
                                               format will be mapped to MIPI data
                                               type internally. */
        int linked_isys_stream_id; /** default value is -1, other value means
@@ -77,7 +77,7 @@ struct ia_css_stream_input_config {
                                                        Used for CSS 2400/1 System and deprecated for other
                                                        systems (replaced by input_effective_res in
                                                        ia_css_pipe_config) */
-       enum ia_css_stream_format format; /** Format of input stream. This data
+       enum atomisp_input_format format; /** Format of input stream. This data
                                               format will be mapped to MIPI data
                                               type internally. */
        enum ia_css_bayer_order bayer_order; /** Bayer order for RAW streams */
@@ -257,7 +257,7 @@ ia_css_stream_unload(struct ia_css_stream *stream);
  *
  * This function will return the stream format.
  */
-enum ia_css_stream_format
+enum atomisp_input_format
 ia_css_stream_get_format(const struct ia_css_stream *stream);
 
 /* @brief Check if the stream is configured for 2 pixels per clock
@@ -453,7 +453,7 @@ ia_css_stream_send_input_line(const struct ia_css_stream *stream,
  */
 void
 ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
-                             enum ia_css_stream_format format,
+                             enum atomisp_input_format format,
                              const unsigned short *data,
                              unsigned int width);
 
index b99c0644ab38bb305527654f23c1721a455bc77e..675f6e539b3f26fe8a257f73e2ce7d93fb5de2ed 100644 (file)
@@ -17,7 +17,6 @@
 
 #include "ia_css_bnlm_types.h"
 #include "ia_css_bnlm_param.h"
-#include "ia_css_bnlm_default.host.h"
 
 void
 ia_css_bnlm_vmem_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.c
deleted file mode 100644 (file)
index e2eb88c..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_bnlm_types.h"
-
-const struct ia_css_bnlm_config default_bnlm_config = {
-
-       .rad_enable = true,
-       .rad_x_origin = 0,
-       .rad_y_origin = 0,
-       .avg_min_th = 127,
-       .max_min_th = 2047,
-
-       .exp_coeff_a = 6048,
-       .exp_coeff_b = 7828,
-       .exp_coeff_c = 0,
-       .exp_exponent = 3,
-
-       .nl_th = {2252, 2251, 2250},
-       .match_quality_max_idx = {2, 3, 3, 1},
-
-       .mu_root_lut_thr = {
-               26, 56, 128, 216, 462, 626, 932, 1108, 1480, 1564, 1824, 1896, 2368, 3428, 4560},
-       .mu_root_lut_val = {
-               384, 320, 320, 264, 248, 240, 224, 192, 192, 160, 160, 160, 136, 130, 96, 80},
-       .sad_norm_lut_thr = {
-               236, 328, 470, 774, 964, 1486, 2294, 3244, 4844, 6524, 6524, 6524, 6524, 6524, 6524},
-       .sad_norm_lut_val = {
-               8064, 7680, 7168, 6144, 5120, 3840, 2560, 2304, 1984, 1792, 1792, 1792, 1792, 1792, 1792, 1792},
-       .sig_detail_lut_thr = {
-               2936, 3354, 3943, 4896, 5230, 5682, 5996, 7299, 7299, 7299, 7299, 7299, 7299, 7299, 7299},
-       .sig_detail_lut_val = {
-               8191, 7680, 7168, 6144, 5120, 4608, 4224, 4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032},
-       .sig_rad_lut_thr = {
-               18, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20},
-       .sig_rad_lut_val = {
-               2560, 7168, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188},
-       .rad_pow_lut_thr = {
-               0, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013},
-       .rad_pow_lut_val = {
-               8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191},
-       .nl_0_lut_thr = {
-               1072, 7000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000},
-       .nl_0_lut_val = {
-               2560, 3072, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120},
-       .nl_1_lut_thr = {
-               624, 3224, 3392, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424},
-       .nl_1_lut_val = {
-               3584, 4608, 5120, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144},
-       .nl_2_lut_thr = {
-               745, 2896, 3720, 6535, 7696, 8040, 8040, 8040, 8040, 8040, 8040, 8040, 8040, 8040, 8040},
-       .nl_2_lut_val = {
-               3584, 4608, 6144, 7168, 7936, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191},
-       .nl_3_lut_thr = {
-               4848, 4984, 5872, 6000, 6517, 6960, 7944, 8088, 8161, 8161, 8161, 8161, 8161, 8161, 8161},
-       .nl_3_lut_val = {
-               3072, 4104, 4608, 5120, 6144, 7168, 7680, 8128, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191},
-
-};
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.h
deleted file mode 100644 (file)
index f18c807..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_BNLM_DEFAULT_HOST_H
-#define __IA_CSS_BNLM_DEFAULT_HOST_H
-
-#include "ia_css_bnlm_types.h"
-extern const struct ia_css_bnlm_config default_bnlm_config;
-
-#endif /* __IA_CSS_BNLM_DEFAULT_HOST_H */
-
index 641564b4af8e59c393d4969d3657d049c2caa86a..38d10a5237c6e7756d7b399447cc1ee9e150857e 100644 (file)
@@ -17,7 +17,6 @@
 
 #include "ia_css_dpc2_types.h"
 #include "ia_css_dpc2_param.h"
-#include "ia_css_dpc2_default.host.h"
 
 void
 ia_css_dpc2_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.c
deleted file mode 100644 (file)
index c102601..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_dpc2_types.h"
-
-const struct ia_css_dpc2_config default_dpc2_config = {
-       .metric1 = 1638,
-       .metric2 =  128,
-       .metric3 = 1638,
-       .wb_gain_gr = 512,
-       .wb_gain_r  = 512,
-       .wb_gain_b  = 512,
-       .wb_gain_gb = 512
-};
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.h
deleted file mode 100644 (file)
index a1527ce..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_DPC2_DEFAULT_HOST_H
-#define __IA_CSS_DPC2_DEFAULT_HOST_H
-
-#include "ia_css_dpc2_types.h"
-
-extern const struct ia_css_dpc2_config default_dpc2_config;
-
-#endif /* __IA_CSS_DPC2_DEFAULT_HOST_H */
-
index 355ff13273b0465196b22417777b24b77060dd93..fff932c1364e2cb331a37910b981b5ccd2afdf4c 100644 (file)
@@ -17,7 +17,6 @@
 
 #include "ia_css_eed1_8_types.h"
 #include "ia_css_eed1_8_param.h"
-#include "ia_css_eed1_8_default.host.h"
 
 void
 ia_css_eed1_8_vmem_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.c
deleted file mode 100644 (file)
index 3622719..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_eed1_8_types.h"
-
-/* The default values for the kernel parameters are based on
- * ISP261 CSS API public parameter list_all.xlsx from 12-09-2014
- * The parameter list is available on the ISP261 sharepoint
- */
-
-/* Default kernel parameters. */
-const struct ia_css_eed1_8_config default_eed1_8_config = {
-       .rbzp_strength = 5489,
-       .fcstrength = 6554,
-       .fcthres_0 = 0,
-       .fcthres_1 = 0,
-       .fc_sat_coef = 8191,
-       .fc_coring_prm = 128,
-       .aerel_thres0 = 0,
-       .aerel_gain0 = 8191,
-       .aerel_thres1 = 16,
-       .aerel_gain1 = 20,
-       .derel_thres0 = 1229,
-       .derel_gain0 = 1,
-       .derel_thres1 = 819,
-       .derel_gain1 = 1,
-       .coring_pos0 = 0,
-       .coring_pos1 = 0,
-       .coring_neg0 = 0,
-       .coring_neg1 = 0,
-       .gain_exp = 2,
-       .gain_pos0 = 6144,
-       .gain_pos1 = 2048,
-       .gain_neg0 = 2048,
-       .gain_neg1 = 6144,
-       .pos_margin0 = 1475,
-       .pos_margin1 = 1475,
-       .neg_margin0 = 1475,
-       .neg_margin1 = 1475,
-       .dew_enhance_seg_x = {
-               0,
-               64,
-               272,
-               688,
-               1376,
-               2400,
-               3840,
-               5744,
-               8191
-               },
-       .dew_enhance_seg_y = {
-               0,
-               144,
-               480,
-               1040,
-               1852,
-               2945,
-               4357,
-               6094,
-               8191
-               },
-       .dew_enhance_seg_slope = {
-               4608,
-               3308,
-               2757,
-               2417,
-               2186,
-               8033,
-               7473,
-               7020
-               },
-       .dew_enhance_seg_exp = {
-               2,
-               2,
-               2,
-               2,
-               2,
-               0,
-               0,
-               0
-               },
-       .dedgew_max = 6144
-};
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.h
deleted file mode 100644 (file)
index 782f739..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_EED1_8_DEFAULT_HOST_H
-#define __IA_CSS_EED1_8_DEFAULT_HOST_H
-
-#include "ia_css_eed1_8_types.h"
-
-extern const struct ia_css_eed1_8_config default_eed1_8_config;
-
-#endif /* __IA_CSS_EED1_8_DEFAULT_HOST_H */
index 8fdf47c9310c8e69b61852f1c94063932338456a..9efe5e5e4e06bea0eb72768eaa4ee2e13ad38fdc 100644 (file)
@@ -60,7 +60,7 @@ ia_css_output_config(
        (void)size;
        ia_css_dma_configure_from_info(&to->port_b, from->info);
        to->width_a_over_b = elems_a / to->port_b.elems;
-       to->height = from->info->res.height;
+       to->height = from->info ? from->info->res.height : 0;
        to->enable = from->info != NULL;
        ia_css_frame_info_to_frame_sp_info(&to->info, from->info);
 
index 68a27f0cfba0845172c0c4ebb0e8d7d507d3425f..fa9ce0fedf234b3d03ef3ad0c4997493b441a99b 100644 (file)
@@ -37,34 +37,34 @@ sh_css_elems_bytes_from_info (unsigned raw_bit_depth)
 
 /* MW: These areMIPI / ISYS properties, not camera function properties */
 static enum sh_stream_format
-css2isp_stream_format(enum ia_css_stream_format from)
+css2isp_stream_format(enum atomisp_input_format from)
 {
        switch (from) {
-       case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
                return sh_stream_format_yuv420_legacy;
-       case IA_CSS_STREAM_FORMAT_YUV420_8:
-       case IA_CSS_STREAM_FORMAT_YUV420_10:
-       case IA_CSS_STREAM_FORMAT_YUV420_16:
+       case ATOMISP_INPUT_FORMAT_YUV420_8:
+       case ATOMISP_INPUT_FORMAT_YUV420_10:
+       case ATOMISP_INPUT_FORMAT_YUV420_16:
                return sh_stream_format_yuv420;
-       case IA_CSS_STREAM_FORMAT_YUV422_8:
-       case IA_CSS_STREAM_FORMAT_YUV422_10:
-       case IA_CSS_STREAM_FORMAT_YUV422_16:
+       case ATOMISP_INPUT_FORMAT_YUV422_8:
+       case ATOMISP_INPUT_FORMAT_YUV422_10:
+       case ATOMISP_INPUT_FORMAT_YUV422_16:
                return sh_stream_format_yuv422;
-       case IA_CSS_STREAM_FORMAT_RGB_444:
-       case IA_CSS_STREAM_FORMAT_RGB_555:
-       case IA_CSS_STREAM_FORMAT_RGB_565:
-       case IA_CSS_STREAM_FORMAT_RGB_666:
-       case IA_CSS_STREAM_FORMAT_RGB_888:
+       case ATOMISP_INPUT_FORMAT_RGB_444:
+       case ATOMISP_INPUT_FORMAT_RGB_555:
+       case ATOMISP_INPUT_FORMAT_RGB_565:
+       case ATOMISP_INPUT_FORMAT_RGB_666:
+       case ATOMISP_INPUT_FORMAT_RGB_888:
                return sh_stream_format_rgb;
-       case IA_CSS_STREAM_FORMAT_RAW_6:
-       case IA_CSS_STREAM_FORMAT_RAW_7:
-       case IA_CSS_STREAM_FORMAT_RAW_8:
-       case IA_CSS_STREAM_FORMAT_RAW_10:
-       case IA_CSS_STREAM_FORMAT_RAW_12:
-       case IA_CSS_STREAM_FORMAT_RAW_14:
-       case IA_CSS_STREAM_FORMAT_RAW_16:
+       case ATOMISP_INPUT_FORMAT_RAW_6:
+       case ATOMISP_INPUT_FORMAT_RAW_7:
+       case ATOMISP_INPUT_FORMAT_RAW_8:
+       case ATOMISP_INPUT_FORMAT_RAW_10:
+       case ATOMISP_INPUT_FORMAT_RAW_12:
+       case ATOMISP_INPUT_FORMAT_RAW_14:
+       case ATOMISP_INPUT_FORMAT_RAW_16:
                return sh_stream_format_raw;
-       case IA_CSS_STREAM_FORMAT_BINARY_8:
+       case ATOMISP_INPUT_FORMAT_BINARY_8:
        default:
                return sh_stream_format_raw;
        }
index 5c0b8febd79aeac36909f87dddba98e45c8e4be6..ae868eb5e10f7e13e0fa840ade1a3c9dd46d4895 100644 (file)
@@ -28,7 +28,7 @@ struct ia_css_raw_configuration {
        const struct ia_css_frame_info  *in_info;
        const struct ia_css_frame_info  *internal_info;
        bool two_ppc;
-       enum ia_css_stream_format stream_format;
+       enum atomisp_input_format stream_format;
        bool deinterleaved;
        uint8_t enable_left_padding;
 };
index e775af51c0c055da77d5496f6dec7e2b57dbdb48..78a113bfe8f1cf22cd0effe8fc26789bfd77290e 100644 (file)
@@ -15,7 +15,7 @@
 #include "ia_css_debug.h"
 #include "ia_css_tdf.host.h"
 
-const int16_t g_pyramid[8][8] = {
+static const int16_t g_pyramid[8][8] = {
 {128, 384, 640, 896, 896, 640, 384, 128},
 {384, 1152, 1920, 2688, 2688, 1920, 1152, 384},
 {640, 1920, 3200, 4480, 4480, 3200, 1920, 640},
index 1b3e759e41a35ca304cf3e0c99da4809aba19b26..bd628a18e839edf4aca8bfdfd88997823e35bc6b 100644 (file)
@@ -17,7 +17,6 @@
 
 #include "ia_css_tdf_types.h"
 #include "ia_css_tdf_param.h"
-#include "ia_css_tdf_default.host.h"
 
 void
 ia_css_tdf_vmem_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.c
deleted file mode 100644 (file)
index 9bb42da..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_tdf_types.h"
-
-const struct ia_css_tdf_config default_tdf_config = {
-       .thres_flat_table = {0},
-       .thres_detail_table = {0},
-       .epsilon_0 = 4095,
-       .epsilon_1 = 5733,
-       .eps_scale_text = 409,
-       .eps_scale_edge = 3686,
-       .sepa_flat = 1294,
-       .sepa_edge = 4095,
-       .blend_flat = 819,
-       .blend_text = 819,
-       .blend_edge = 8191,
-       .shading_gain = 1024,
-       .shading_base_gain = 8191,
-       .local_y_gain = 0,
-       .local_y_base_gain = 2047,
-       .rad_x_origin = 0,
-       .rad_y_origin = 0
-};
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.h
deleted file mode 100644 (file)
index cd8fb70..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_TDF_DEFAULT_HOST_H
-#define __IA_CSS_TDF_DEFAULT_HOST_H
-
-#include "ia_css_tdf_types.h"
-
-extern const struct ia_css_tdf_config default_tdf_config;
-
-#endif /* __IA_CSS_TDF_DEFAULT_HOST_H */
-
index 5610833ed595d242f7311b428c4cd7c8820db4de..c2076e4124101d93386654789f8358c75f82fde8 100644 (file)
@@ -130,11 +130,11 @@ ia_css_vf_configure(
 
        err = configure_kernel(info, out_info, vf_info, downscale_log2, &config);
        configure_dma(&config, vf_info);
-       if (binary) {
-               if (vf_info)
-                       vf_info->raw_bit_depth = info->dma.vfdec_bits_per_pixel;
-               ia_css_configure_vf (binary, &config);
-       }
+
+       if (vf_info)
+               vf_info->raw_bit_depth = info->dma.vfdec_bits_per_pixel;
+       ia_css_configure_vf (binary, &config);
+
        return IA_CSS_SUCCESS;
 }
 
index 732e49a241ebe67206cf83eee0c4a5b7911edb16..b62c4d321a4ec65cf9165c2e2e6ff39e7687fb81 100644 (file)
@@ -113,7 +113,7 @@ struct ia_css_binary_descr {
 #endif
        bool enable_capture_pp_bli;
        struct ia_css_resolution dvs_env;
-       enum ia_css_stream_format stream_format;
+       enum atomisp_input_format stream_format;
        struct ia_css_frame_info *in_info;              /* the info of the input-frame with the
                                                           ISP required resolution. */
        struct ia_css_frame_info *bds_out_info;
@@ -126,7 +126,7 @@ struct ia_css_binary_descr {
 
 struct ia_css_binary {
        const struct ia_css_binary_xinfo *info;
-       enum ia_css_stream_format input_format;
+       enum atomisp_input_format input_format;
        struct ia_css_frame_info in_frame_info;
        struct ia_css_frame_info internal_frame_info;
        struct ia_css_frame_info out_frame_info[IA_CSS_BINARY_MAX_OUTPUT_PORTS];
@@ -162,7 +162,7 @@ struct ia_css_binary {
 
 #define IA_CSS_BINARY_DEFAULT_SETTINGS \
 (struct ia_css_binary) { \
-       .input_format           = IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY, \
+       .input_format           = ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, \
        .in_frame_info          = IA_CSS_BINARY_DEFAULT_FRAME_INFO, \
        .internal_frame_info    = IA_CSS_BINARY_DEFAULT_FRAME_INFO, \
        .out_frame_info         = {IA_CSS_BINARY_DEFAULT_FRAME_INFO}, \
@@ -179,7 +179,7 @@ enum ia_css_err
 ia_css_binary_fill_info(const struct ia_css_binary_xinfo *xinfo,
                 bool online,
                 bool two_ppc,
-                enum ia_css_stream_format stream_format,
+                enum atomisp_input_format stream_format,
                 const struct ia_css_frame_info *in_info,
                 const struct ia_css_frame_info *bds_out_info,
                 const struct ia_css_frame_info *out_info[],
index a0f0e9062c4ccefeb80bb68049db14f5ebc2446c..0cd6e1da43cfe7ae0f0a20a4feaf9e718ec39b72 100644 (file)
@@ -861,7 +861,7 @@ binary_supports_output_format(const struct ia_css_binary_xinfo *info,
 #ifdef ISP2401
 static bool
 binary_supports_input_format(const struct ia_css_binary_xinfo *info,
-                            enum ia_css_stream_format format)
+                            enum atomisp_input_format format)
 {
 
        assert(info != NULL);
@@ -1088,7 +1088,7 @@ enum ia_css_err
 ia_css_binary_fill_info(const struct ia_css_binary_xinfo *xinfo,
                 bool online,
                 bool two_ppc,
-                enum ia_css_stream_format stream_format,
+                enum atomisp_input_format stream_format,
                 const struct ia_css_frame_info *in_info, /* can be NULL */
                 const struct ia_css_frame_info *bds_out_info, /* can be NULL */
                 const struct ia_css_frame_info *out_info[], /* can be NULL */
@@ -1382,7 +1382,7 @@ ia_css_binary_find(struct ia_css_binary_descr *descr,
        int mode;
        bool online;
        bool two_ppc;
-       enum ia_css_stream_format stream_format;
+       enum atomisp_input_format stream_format;
        const struct ia_css_frame_info *req_in_info,
                                       *req_bds_out_info,
                                       *req_out_info[IA_CSS_BINARY_MAX_OUTPUT_PORTS],
index e50d9f2e2609e408dca2c90519c6806243c6aa8f..ffbcdd80d934374eb65b064f90a6472a1a8ea600 100644 (file)
@@ -90,12 +90,11 @@ struct sh_css_queues {
 
 #endif
 
-struct sh_css_queues  css_queues;
-
-
 /*******************************************************
 *** Static variables
 ********************************************************/
+static struct sh_css_queues css_queues;
+
 static int buffer_type_to_queue_id_map[SH_CSS_MAX_SP_THREADS][IA_CSS_NUM_DYNAMIC_BUFFER_TYPE];
 static bool queue_availability[SH_CSS_MAX_SP_THREADS][SH_CSS_MAX_NUM_QUEUES];
 
@@ -207,7 +206,7 @@ static void map_buffer_type_to_queue_id(
        }
 
        for (i = SH_CSS_QUEUE_C_ID; i < SH_CSS_MAX_NUM_QUEUES; i++) {
-               if (queue_availability[thread_id][i] == true) {
+               if (queue_availability[thread_id][i]) {
                        queue_availability[thread_id][i] = false;
                        buffer_type_to_queue_id_map[thread_id][buf_type] = i;
                        break;
@@ -266,7 +265,7 @@ static ia_css_queue_t *bufq_get_qhandle(
        case sh_css_sp2host_isys_event_queue:
                q = &css_queues.sp2host_isys_event_queue_handle;
                break;
-#endif         
+#endif
        case sh_css_host2sp_tag_cmd_queue:
                q = &css_queues.host2sp_tag_cmd_queue_handle;
                break;
index 60395904f89a8bca1edfc176dda8a09f67909f39..4607a76dc78af76cad83daab4546aa2592dbdfc7 100644 (file)
 /* Global variable to store the dtrace verbosity level */
 unsigned int ia_css_debug_trace_level = IA_CSS_DEBUG_WARNING;
 
-/* Assumes that IA_CSS_STREAM_FORMAT_BINARY_8 is last */
-#define N_IA_CSS_STREAM_FORMAT (IA_CSS_STREAM_FORMAT_BINARY_8+1)
-
 #define DPG_START "ia_css_debug_pipe_graph_dump_start "
 #define DPG_END   " ia_css_debug_pipe_graph_dump_end\n"
 
@@ -141,8 +138,8 @@ static struct pipe_graph_class {
        int width;
        int eff_height;
        int eff_width;
-       enum ia_css_stream_format stream_format;
-} pg_inst = {true, 0, 0, 0, 0, N_IA_CSS_STREAM_FORMAT};
+       enum atomisp_input_format stream_format;
+} pg_inst = {true, 0, 0, 0, 0, N_ATOMISP_INPUT_FORMAT};
 
 static const char * const queue_id_to_str[] = {
        /* [SH_CSS_QUEUE_A_ID]     =*/ "queue_A",
@@ -261,86 +258,86 @@ unsigned int ia_css_debug_get_dtrace_level(void)
        return ia_css_debug_trace_level;
 }
 
-static const char *debug_stream_format2str(const enum ia_css_stream_format stream_format)
+static const char *debug_stream_format2str(const enum atomisp_input_format stream_format)
 {
        switch (stream_format) {
-       case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
                return "yuv420-8-legacy";
-       case IA_CSS_STREAM_FORMAT_YUV420_8:
+       case ATOMISP_INPUT_FORMAT_YUV420_8:
                return "yuv420-8";
-       case IA_CSS_STREAM_FORMAT_YUV420_10:
+       case ATOMISP_INPUT_FORMAT_YUV420_10:
                return "yuv420-10";
-       case IA_CSS_STREAM_FORMAT_YUV420_16:
+       case ATOMISP_INPUT_FORMAT_YUV420_16:
                return "yuv420-16";
-       case IA_CSS_STREAM_FORMAT_YUV422_8:
+       case ATOMISP_INPUT_FORMAT_YUV422_8:
                return "yuv422-8";
-       case IA_CSS_STREAM_FORMAT_YUV422_10:
+       case ATOMISP_INPUT_FORMAT_YUV422_10:
                return "yuv422-10";
-       case IA_CSS_STREAM_FORMAT_YUV422_16:
+       case ATOMISP_INPUT_FORMAT_YUV422_16:
                return "yuv422-16";
-       case IA_CSS_STREAM_FORMAT_RGB_444:
+       case ATOMISP_INPUT_FORMAT_RGB_444:
                return "rgb444";
-       case IA_CSS_STREAM_FORMAT_RGB_555:
+       case ATOMISP_INPUT_FORMAT_RGB_555:
                return "rgb555";
-       case IA_CSS_STREAM_FORMAT_RGB_565:
+       case ATOMISP_INPUT_FORMAT_RGB_565:
                return "rgb565";
-       case IA_CSS_STREAM_FORMAT_RGB_666:
+       case ATOMISP_INPUT_FORMAT_RGB_666:
                return "rgb666";
-       case IA_CSS_STREAM_FORMAT_RGB_888:
+       case ATOMISP_INPUT_FORMAT_RGB_888:
                return "rgb888";
-       case IA_CSS_STREAM_FORMAT_RAW_6:
+       case ATOMISP_INPUT_FORMAT_RAW_6:
                return "raw6";
-       case IA_CSS_STREAM_FORMAT_RAW_7:
+       case ATOMISP_INPUT_FORMAT_RAW_7:
                return "raw7";
-       case IA_CSS_STREAM_FORMAT_RAW_8:
+       case ATOMISP_INPUT_FORMAT_RAW_8:
                return "raw8";
-       case IA_CSS_STREAM_FORMAT_RAW_10:
+       case ATOMISP_INPUT_FORMAT_RAW_10:
                return "raw10";
-       case IA_CSS_STREAM_FORMAT_RAW_12:
+       case ATOMISP_INPUT_FORMAT_RAW_12:
                return "raw12";
-       case IA_CSS_STREAM_FORMAT_RAW_14:
+       case ATOMISP_INPUT_FORMAT_RAW_14:
                return "raw14";
-       case IA_CSS_STREAM_FORMAT_RAW_16:
+       case ATOMISP_INPUT_FORMAT_RAW_16:
                return "raw16";
-       case IA_CSS_STREAM_FORMAT_BINARY_8:
+       case ATOMISP_INPUT_FORMAT_BINARY_8:
                return "binary8";
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT1:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT1:
                return "generic-short1";
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT2:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT2:
                return "generic-short2";
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT3:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT3:
                return "generic-short3";
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT4:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT4:
                return "generic-short4";
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT5:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT5:
                return "generic-short5";
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT6:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT6:
                return "generic-short6";
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT7:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT7:
                return "generic-short7";
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT8:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT8:
                return "generic-short8";
-       case IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_SHIFT:
                return "yuv420-8-shift";
-       case IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT:
+       case ATOMISP_INPUT_FORMAT_YUV420_10_SHIFT:
                return "yuv420-10-shift";
-       case IA_CSS_STREAM_FORMAT_EMBEDDED:
+       case ATOMISP_INPUT_FORMAT_EMBEDDED:
                return "embedded-8";
-       case IA_CSS_STREAM_FORMAT_USER_DEF1:
+       case ATOMISP_INPUT_FORMAT_USER_DEF1:
                return "user-def-8-type-1";
-       case IA_CSS_STREAM_FORMAT_USER_DEF2:
+       case ATOMISP_INPUT_FORMAT_USER_DEF2:
                return "user-def-8-type-2";
-       case IA_CSS_STREAM_FORMAT_USER_DEF3:
+       case ATOMISP_INPUT_FORMAT_USER_DEF3:
                return "user-def-8-type-3";
-       case IA_CSS_STREAM_FORMAT_USER_DEF4:
+       case ATOMISP_INPUT_FORMAT_USER_DEF4:
                return "user-def-8-type-4";
-       case IA_CSS_STREAM_FORMAT_USER_DEF5:
+       case ATOMISP_INPUT_FORMAT_USER_DEF5:
                return "user-def-8-type-5";
-       case IA_CSS_STREAM_FORMAT_USER_DEF6:
+       case ATOMISP_INPUT_FORMAT_USER_DEF6:
                return "user-def-8-type-6";
-       case IA_CSS_STREAM_FORMAT_USER_DEF7:
+       case ATOMISP_INPUT_FORMAT_USER_DEF7:
                return "user-def-8-type-7";
-       case IA_CSS_STREAM_FORMAT_USER_DEF8:
+       case ATOMISP_INPUT_FORMAT_USER_DEF8:
                return "user-def-8-type-8";
 
        default:
@@ -2679,9 +2676,9 @@ ia_css_debug_pipe_graph_dump_frame(
        }
        dtrace_dot(
                "node [shape = box, "
-               "fixedsize=true, width=2, height=0.7]; \"0x%08lx\" "
+               "fixedsize=true, width=2, height=0.7]; \"%p\" "
                "[label = \"%s\\n%d(%d) x %d, %dbpp\\n%s\"];",
-               HOST_ADDRESS(frame),
+               frame,
                debug_frame_format2str(frame->info.format),
                frame->info.res.width,
                frame->info.padded_width,
@@ -2691,16 +2688,16 @@ ia_css_debug_pipe_graph_dump_frame(
 
        if (in_frame) {
                dtrace_dot(
-                       "\"0x%08lx\"->\"%s(pipe%d)\" "
+                       "\"%p\"->\"%s(pipe%d)\" "
                        "[label = %s_frame];",
-                       HOST_ADDRESS(frame),
+                       frame,
                        blob_name, id, frame_name);
        } else {
                dtrace_dot(
-                       "\"%s(pipe%d)\"->\"0x%08lx\" "
+                       "\"%s(pipe%d)\"->\"%p\" "
                        "[label = %s_frame];",
                        blob_name, id,
-                       HOST_ADDRESS(frame),
+                       frame,
                        frame_name);
        }
 }
@@ -2730,7 +2727,7 @@ void ia_css_debug_pipe_graph_dump_epilogue(void)
        }
 
 
-       if (pg_inst.stream_format != N_IA_CSS_STREAM_FORMAT) {
+       if (pg_inst.stream_format != N_ATOMISP_INPUT_FORMAT) {
                /* An input stream format has been set so assume we have
                 * an input system and sensor
                 */
@@ -2770,7 +2767,7 @@ void ia_css_debug_pipe_graph_dump_epilogue(void)
        pg_inst.height = 0;
        pg_inst.eff_width = 0;
        pg_inst.eff_height = 0;
-       pg_inst.stream_format = N_IA_CSS_STREAM_FORMAT;
+       pg_inst.stream_format = N_ATOMISP_INPUT_FORMAT;
 }
 
 void
@@ -3011,9 +3008,9 @@ ia_css_debug_pipe_graph_dump_sp_raw_copy(
 
        snprintf(ring_buffer, sizeof(ring_buffer),
                "node [shape = box, "
-               "fixedsize=true, width=2, height=0.7]; \"0x%08lx\" "
+               "fixedsize=true, width=2, height=0.7]; \"%p\" "
                "[label = \"%s\\n%d(%d) x %d\\nRingbuffer\"];",
-               HOST_ADDRESS(out_frame),
+               out_frame,
                debug_frame_format2str(out_frame->info.format),
                out_frame->info.res.width,
                out_frame->info.padded_width,
@@ -3022,9 +3019,9 @@ ia_css_debug_pipe_graph_dump_sp_raw_copy(
        dtrace_dot(ring_buffer);
 
        dtrace_dot(
-               "\"%s(pipe%d)\"->\"0x%08lx\" "
+               "\"%s(pipe%d)\"->\"%p\" "
                "[label = out_frame];",
-               "sp_raw_copy", 1, HOST_ADDRESS(out_frame));
+               "sp_raw_copy", 1, out_frame);
 
        snprintf(dot_id_input_bin, sizeof(dot_id_input_bin), "%s(pipe%d)", "sp_raw_copy", 1);
 }
index adefa57820a4ac46a59467ed6e2c06792161af98..1bed027435fd01588efe9a12736d0c6afeb74ccd 100644 (file)
@@ -112,13 +112,13 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
            width_b_factor = 1, start_column_b,
            left_padding = 0;
        input_formatter_cfg_t if_a_config, if_b_config;
-       enum ia_css_stream_format input_format;
+       enum atomisp_input_format input_format;
        enum ia_css_err err = IA_CSS_SUCCESS;
        uint8_t if_config_index;
 
        /* Determine which input formatter config set is targeted. */
        /* Index is equal to the CSI-2 port used. */
-       enum ia_css_csi2_port port;
+       enum mipi_port_id port;
 
        if (binary) {
                cropped_height = binary->in_frame_info.res.height;
@@ -141,7 +141,7 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
        if (config->mode == IA_CSS_INPUT_MODE_SENSOR
            || config->mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR) {
                port = config->source.port.port;
-               if_config_index = (uint8_t) (port - IA_CSS_CSI2_PORT0);
+               if_config_index = (uint8_t) (port - MIPI_PORT0_ID);
        } else if (config->mode == IA_CSS_INPUT_MODE_MEMORY) {
                if_config_index = SH_CSS_IF_CONFIG_NOT_NEEDED;
        } else {
@@ -189,7 +189,7 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
        bits_per_pixel = input_formatter_get_alignment(INPUT_FORMATTER0_ID)
            * 8 / ISP_VEC_NELEMS;
        switch (input_format) {
-       case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
                if (two_ppc) {
                        vmem_increment = 1;
                        deinterleaving = 1;
@@ -219,9 +219,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
                        start_column = start_column * deinterleaving / 2;
                }
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_8:
-       case IA_CSS_STREAM_FORMAT_YUV420_10:
-       case IA_CSS_STREAM_FORMAT_YUV420_16:
+       case ATOMISP_INPUT_FORMAT_YUV420_8:
+       case ATOMISP_INPUT_FORMAT_YUV420_10:
+       case ATOMISP_INPUT_FORMAT_YUV420_16:
                if (two_ppc) {
                        vmem_increment = 1;
                        deinterleaving = 1;
@@ -246,9 +246,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
                        start_column *= deinterleaving;
                }
                break;
-       case IA_CSS_STREAM_FORMAT_YUV422_8:
-       case IA_CSS_STREAM_FORMAT_YUV422_10:
-       case IA_CSS_STREAM_FORMAT_YUV422_16:
+       case ATOMISP_INPUT_FORMAT_YUV422_8:
+       case ATOMISP_INPUT_FORMAT_YUV422_10:
+       case ATOMISP_INPUT_FORMAT_YUV422_16:
                if (two_ppc) {
                        vmem_increment = 1;
                        deinterleaving = 1;
@@ -267,11 +267,11 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
                        start_column *= deinterleaving;
                }
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_444:
-       case IA_CSS_STREAM_FORMAT_RGB_555:
-       case IA_CSS_STREAM_FORMAT_RGB_565:
-       case IA_CSS_STREAM_FORMAT_RGB_666:
-       case IA_CSS_STREAM_FORMAT_RGB_888:
+       case ATOMISP_INPUT_FORMAT_RGB_444:
+       case ATOMISP_INPUT_FORMAT_RGB_555:
+       case ATOMISP_INPUT_FORMAT_RGB_565:
+       case ATOMISP_INPUT_FORMAT_RGB_666:
+       case ATOMISP_INPUT_FORMAT_RGB_888:
                num_vectors *= 2;
                if (two_ppc) {
                        deinterleaving = 2;     /* BR in if_a, G in if_b */
@@ -293,11 +293,11 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
                num_vectors = num_vectors / 2 * deinterleaving;
                buf_offset_b = buffer_width / 2 / ISP_VEC_NELEMS;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_6:
-       case IA_CSS_STREAM_FORMAT_RAW_7:
-       case IA_CSS_STREAM_FORMAT_RAW_8:
-       case IA_CSS_STREAM_FORMAT_RAW_10:
-       case IA_CSS_STREAM_FORMAT_RAW_12:
+       case ATOMISP_INPUT_FORMAT_RAW_6:
+       case ATOMISP_INPUT_FORMAT_RAW_7:
+       case ATOMISP_INPUT_FORMAT_RAW_8:
+       case ATOMISP_INPUT_FORMAT_RAW_10:
+       case ATOMISP_INPUT_FORMAT_RAW_12:
                if (two_ppc) {
                        int crop_col = (start_column % 2) == 1;
                        vmem_increment = 2;
@@ -332,8 +332,8 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
                vectors_per_line = CEIL_DIV(cropped_width, ISP_VEC_NELEMS);
                vectors_per_line = CEIL_MUL(vectors_per_line, deinterleaving);
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_14:
-       case IA_CSS_STREAM_FORMAT_RAW_16:
+       case ATOMISP_INPUT_FORMAT_RAW_14:
+       case ATOMISP_INPUT_FORMAT_RAW_16:
                if (two_ppc) {
                        num_vectors *= 2;
                        vmem_increment = 1;
@@ -350,26 +350,26 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
                }
                buffer_height *= 2;
                break;
-       case IA_CSS_STREAM_FORMAT_BINARY_8:
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT1:
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT2:
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT3:
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT4:
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT5:
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT6:
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT7:
-       case IA_CSS_STREAM_FORMAT_GENERIC_SHORT8:
-       case IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT:
-       case IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT:
-       case IA_CSS_STREAM_FORMAT_EMBEDDED:
-       case IA_CSS_STREAM_FORMAT_USER_DEF1:
-       case IA_CSS_STREAM_FORMAT_USER_DEF2:
-       case IA_CSS_STREAM_FORMAT_USER_DEF3:
-       case IA_CSS_STREAM_FORMAT_USER_DEF4:
-       case IA_CSS_STREAM_FORMAT_USER_DEF5:
-       case IA_CSS_STREAM_FORMAT_USER_DEF6:
-       case IA_CSS_STREAM_FORMAT_USER_DEF7:
-       case IA_CSS_STREAM_FORMAT_USER_DEF8:
+       case ATOMISP_INPUT_FORMAT_BINARY_8:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT1:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT2:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT3:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT4:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT5:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT6:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT7:
+       case ATOMISP_INPUT_FORMAT_GENERIC_SHORT8:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_SHIFT:
+       case ATOMISP_INPUT_FORMAT_YUV420_10_SHIFT:
+       case ATOMISP_INPUT_FORMAT_EMBEDDED:
+       case ATOMISP_INPUT_FORMAT_USER_DEF1:
+       case ATOMISP_INPUT_FORMAT_USER_DEF2:
+       case ATOMISP_INPUT_FORMAT_USER_DEF3:
+       case ATOMISP_INPUT_FORMAT_USER_DEF4:
+       case ATOMISP_INPUT_FORMAT_USER_DEF5:
+       case ATOMISP_INPUT_FORMAT_USER_DEF6:
+       case ATOMISP_INPUT_FORMAT_USER_DEF7:
+       case ATOMISP_INPUT_FORMAT_USER_DEF8:
                break;
        }
        if (width_a == 0)
@@ -420,9 +420,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
        if_a_config.buf_eol_offset =
            buffer_width * bits_per_pixel / 8 - line_width;
        if_a_config.is_yuv420_format =
-           (input_format == IA_CSS_STREAM_FORMAT_YUV420_8)
-           || (input_format == IA_CSS_STREAM_FORMAT_YUV420_10)
-           || (input_format == IA_CSS_STREAM_FORMAT_YUV420_16);
+           (input_format == ATOMISP_INPUT_FORMAT_YUV420_8)
+           || (input_format == ATOMISP_INPUT_FORMAT_YUV420_10)
+           || (input_format == ATOMISP_INPUT_FORMAT_YUV420_16);
        if_a_config.block_no_reqs = (config->mode != IA_CSS_INPUT_MODE_SENSOR);
 
        if (two_ppc) {
@@ -449,9 +449,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
                if_b_config.buf_eol_offset =
                    buffer_width * bits_per_pixel / 8 - line_width;
                if_b_config.is_yuv420_format =
-                   input_format == IA_CSS_STREAM_FORMAT_YUV420_8
-                   || input_format == IA_CSS_STREAM_FORMAT_YUV420_10
-                   || input_format == IA_CSS_STREAM_FORMAT_YUV420_16;
+                   input_format == ATOMISP_INPUT_FORMAT_YUV420_8
+                   || input_format == ATOMISP_INPUT_FORMAT_YUV420_10
+                   || input_format == ATOMISP_INPUT_FORMAT_YUV420_16;
                if_b_config.block_no_reqs =
                    (config->mode != IA_CSS_INPUT_MODE_SENSOR);
 
index 47d0f7e53f473924eb64619879439c812c5f23e0..545f9e2da59e9b01ede84b174c091b974e0f7530 100644 (file)
@@ -42,12 +42,12 @@ void ia_css_inputfifo_send_input_frame(
        unsigned int    width,
        unsigned int    height,
        unsigned int    ch_id,
-       enum ia_css_stream_format       input_format,
+       enum atomisp_input_format       input_format,
        bool                    two_ppc);
 
 void ia_css_inputfifo_start_frame(
        unsigned int    ch_id,
-       enum ia_css_stream_format       input_format,
+       enum atomisp_input_format       input_format,
        bool                    two_ppc);
 
 void ia_css_inputfifo_send_line(
@@ -59,7 +59,7 @@ void ia_css_inputfifo_send_line(
 
 void ia_css_inputfifo_send_embedded_line(
        unsigned int    ch_id,
-       enum ia_css_stream_format       data_type,
+       enum atomisp_input_format       data_type,
        const unsigned short    *data,
        unsigned int    width);
 
index 8dc74927e9a2fb997e031189610f7088e94cf7bd..24ca4aaf8df1456520ba07076708724141eb8d41 100644 (file)
@@ -86,7 +86,7 @@ static unsigned int inputfifo_curr_ch_id, inputfifo_curr_fmt_type;
 #endif
 struct inputfifo_instance {
        unsigned int                            ch_id;
-       enum ia_css_stream_format       input_format;
+       enum atomisp_input_format       input_format;
        bool                                            two_ppc;
        bool                                            streaming;
        unsigned int                            hblank_cycles;
@@ -466,21 +466,21 @@ static void inputfifo_send_frame(
 
 
 static enum inputfifo_mipi_data_type inputfifo_determine_type(
-       enum ia_css_stream_format input_format)
+       enum atomisp_input_format input_format)
 {
        enum inputfifo_mipi_data_type type;
 
        type = inputfifo_mipi_data_type_regular;
-       if (input_format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY) {
+       if (input_format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY) {
                type =
                        inputfifo_mipi_data_type_yuv420_legacy;
-       } else if (input_format == IA_CSS_STREAM_FORMAT_YUV420_8  ||
-                  input_format == IA_CSS_STREAM_FORMAT_YUV420_10 ||
-                  input_format == IA_CSS_STREAM_FORMAT_YUV420_16) {
+       } else if (input_format == ATOMISP_INPUT_FORMAT_YUV420_8  ||
+                  input_format == ATOMISP_INPUT_FORMAT_YUV420_10 ||
+                  input_format == ATOMISP_INPUT_FORMAT_YUV420_16) {
                type =
                        inputfifo_mipi_data_type_yuv420;
-       } else if (input_format >= IA_CSS_STREAM_FORMAT_RGB_444 &&
-                  input_format <= IA_CSS_STREAM_FORMAT_RGB_888) {
+       } else if (input_format >= ATOMISP_INPUT_FORMAT_RGB_444 &&
+                  input_format <= ATOMISP_INPUT_FORMAT_RGB_888) {
                type =
                        inputfifo_mipi_data_type_rgb;
        }
@@ -500,7 +500,7 @@ void ia_css_inputfifo_send_input_frame(
        unsigned int width,
        unsigned int height,
        unsigned int ch_id,
-       enum ia_css_stream_format input_format,
+       enum atomisp_input_format input_format,
        bool two_ppc)
 {
        unsigned int fmt_type, hblank_cycles, marker_cycles;
@@ -524,7 +524,7 @@ void ia_css_inputfifo_send_input_frame(
 
 void ia_css_inputfifo_start_frame(
        unsigned int ch_id,
-       enum ia_css_stream_format input_format,
+       enum atomisp_input_format input_format,
        bool two_ppc)
 {
        struct inputfifo_instance *s2mi;
@@ -574,7 +574,7 @@ void ia_css_inputfifo_send_line(
 
 void ia_css_inputfifo_send_embedded_line(
        unsigned int    ch_id,
-       enum ia_css_stream_format       data_type,
+       enum atomisp_input_format       data_type,
        const unsigned short    *data,
        unsigned int    width)
 {
index 4cf2defe9ef032589a144a68240224f2caca6f21..8c005db9766e2fa09e9fe9126b42dcb54df72e60 100644 (file)
@@ -50,8 +50,8 @@ typedef input_system_cfg_t    ia_css_isys_descr_t;
 #if defined(USE_INPUT_SYSTEM_VERSION_2) || defined(USE_INPUT_SYSTEM_VERSION_2401)
 input_system_error_t ia_css_isys_init(void);
 void ia_css_isys_uninit(void);
-mipi_port_ID_t ia_css_isys_port_to_mipi_port(
-       enum ia_css_csi2_port api_port);
+enum mipi_port_id ia_css_isys_port_to_mipi_port(
+       enum mipi_port_id api_port);
 #endif
 
 #if defined(USE_INPUT_SYSTEM_VERSION_2401)
@@ -68,7 +68,7 @@ mipi_port_ID_t ia_css_isys_port_to_mipi_port(
  *                             there is already a stream registered with the same handle
  */
 enum ia_css_err ia_css_isys_csi_rx_register_stream(
-       enum ia_css_csi2_port port,
+       enum mipi_port_id port,
        uint32_t isys_stream_id);
 
 /**
@@ -83,14 +83,14 @@ enum ia_css_err ia_css_isys_csi_rx_register_stream(
  *                             there is no stream registered with that handle
  */
 enum ia_css_err ia_css_isys_csi_rx_unregister_stream(
-       enum ia_css_csi2_port port,
+       enum mipi_port_id port,
        uint32_t isys_stream_id);
 
 enum ia_css_err ia_css_isys_convert_compressed_format(
                struct ia_css_csi2_compression *comp,
                struct input_system_cfg_s *cfg);
 unsigned int ia_css_csi2_calculate_input_system_alignment(
-       enum ia_css_stream_format fmt_type);
+       enum atomisp_input_format fmt_type);
 #endif
 
 #if !defined(USE_INPUT_SYSTEM_VERSION_2401)
@@ -101,12 +101,12 @@ void ia_css_isys_rx_configure(
 
 void ia_css_isys_rx_disable(void);
 
-void ia_css_isys_rx_enable_all_interrupts(mipi_port_ID_t port);
+void ia_css_isys_rx_enable_all_interrupts(enum mipi_port_id port);
 
-unsigned int ia_css_isys_rx_get_interrupt_reg(mipi_port_ID_t port);
-void ia_css_isys_rx_get_irq_info(mipi_port_ID_t port,
+unsigned int ia_css_isys_rx_get_interrupt_reg(enum mipi_port_id port);
+void ia_css_isys_rx_get_irq_info(enum mipi_port_id port,
                                 unsigned int *irq_infos);
-void ia_css_isys_rx_clear_irq_info(mipi_port_ID_t port,
+void ia_css_isys_rx_clear_irq_info(enum mipi_port_id port,
                                   unsigned int irq_infos);
 unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits);
 
@@ -124,7 +124,7 @@ unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits);
  * format type must be sumitted correctly by the application.
  */
 enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
-               enum ia_css_stream_format input_format,
+               enum atomisp_input_format input_format,
                mipi_predictor_t compression,
                unsigned int *fmt_type);
 
index 3b04dc51335a50a859805bc5f0999cb0ac999eba..a914ce5532ec9451d267b380f203f114a7416515 100644 (file)
@@ -141,7 +141,7 @@ void ia_css_isys_csi_rx_lut_rmgr_release(
 }
 
 enum ia_css_err ia_css_isys_csi_rx_register_stream(
-       enum ia_css_csi2_port port,
+       enum mipi_port_id port,
        uint32_t isys_stream_id)
 {
        enum ia_css_err retval = IA_CSS_ERR_INTERNAL_ERROR;
@@ -160,7 +160,7 @@ enum ia_css_err ia_css_isys_csi_rx_register_stream(
 }
 
 enum ia_css_err ia_css_isys_csi_rx_unregister_stream(
-       enum ia_css_csi2_port port,
+       enum mipi_port_id port,
        uint32_t isys_stream_id)
 {
        enum ia_css_err retval = IA_CSS_ERR_INTERNAL_ERROR;
index 4122084fd237443fe4fe39672c55366af9553fc9..2ae5e59d5e31f729ae8239625fba77e5a3374671 100644 (file)
@@ -105,8 +105,6 @@ input_system_error_t ia_css_isys_init(void)
 #elif defined(USE_INPUT_SYSTEM_VERSION_2401)
 input_system_error_t ia_css_isys_init(void)
 {
-       input_system_error_t error = INPUT_SYSTEM_ERR_NO_ERROR;
-
        ia_css_isys_csi_rx_lut_rmgr_init();
        ia_css_isys_ibuf_rmgr_init();
        ia_css_isys_dma_channel_rmgr_init();
@@ -120,7 +118,7 @@ input_system_error_t ia_css_isys_init(void)
        isys_irqc_status_enable(ISYS_IRQ1_ID);
        isys_irqc_status_enable(ISYS_IRQ2_ID);
 
-       return error;
+       return INPUT_SYSTEM_ERR_NO_ERROR;
 }
 #endif
 
index 70f6cb5e5918d185fb9f23b1746e39c808c3bd39..425bd3cc3f343c32aedcaff2d70f059c07f496c1 100644 (file)
@@ -36,7 +36,7 @@ more details.
 #include "sh_css_internal.h"
 
 #if !defined(USE_INPUT_SYSTEM_VERSION_2401)
-void ia_css_isys_rx_enable_all_interrupts(mipi_port_ID_t port)
+void ia_css_isys_rx_enable_all_interrupts(enum mipi_port_id port)
 {
        hrt_data bits = receiver_port_reg_load(RX0_ID,
                                port,
@@ -80,22 +80,22 @@ void ia_css_isys_rx_enable_all_interrupts(mipi_port_ID_t port)
  * initializers in Windows. Without that there is no easy way to guarantee
  * that the array values would be in the correct order.
  * */
-mipi_port_ID_t ia_css_isys_port_to_mipi_port(enum ia_css_csi2_port api_port)
+enum mipi_port_id ia_css_isys_port_to_mipi_port(enum mipi_port_id api_port)
 {
        /* In this module the validity of the inptu variable should
         * have been checked already, so we do not check for erroneous
         * values. */
-       mipi_port_ID_t port = MIPI_PORT0_ID;
+       enum mipi_port_id port = MIPI_PORT0_ID;
 
-       if (api_port == IA_CSS_CSI2_PORT1)
+       if (api_port == MIPI_PORT1_ID)
                port = MIPI_PORT1_ID;
-       else if (api_port == IA_CSS_CSI2_PORT2)
+       else if (api_port == MIPI_PORT2_ID)
                port = MIPI_PORT2_ID;
 
        return port;
 }
 
-unsigned int ia_css_isys_rx_get_interrupt_reg(mipi_port_ID_t port)
+unsigned int ia_css_isys_rx_get_interrupt_reg(enum mipi_port_id port)
 {
        return receiver_port_reg_load(RX0_ID,
                                      port,
@@ -104,17 +104,17 @@ unsigned int ia_css_isys_rx_get_interrupt_reg(mipi_port_ID_t port)
 
 void ia_css_rx_get_irq_info(unsigned int *irq_infos)
 {
-       ia_css_rx_port_get_irq_info(IA_CSS_CSI2_PORT1, irq_infos);
+       ia_css_rx_port_get_irq_info(MIPI_PORT1_ID, irq_infos);
 }
 
-void ia_css_rx_port_get_irq_info(enum ia_css_csi2_port api_port,
+void ia_css_rx_port_get_irq_info(enum mipi_port_id api_port,
                                 unsigned int *irq_infos)
 {
-       mipi_port_ID_t port = ia_css_isys_port_to_mipi_port(api_port);
+       enum mipi_port_id port = ia_css_isys_port_to_mipi_port(api_port);
        ia_css_isys_rx_get_irq_info(port, irq_infos);
 }
 
-void ia_css_isys_rx_get_irq_info(mipi_port_ID_t port,
+void ia_css_isys_rx_get_irq_info(enum mipi_port_id port,
                                 unsigned int *irq_infos)
 {
        unsigned int bits;
@@ -169,16 +169,16 @@ unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits)
 
 void ia_css_rx_clear_irq_info(unsigned int irq_infos)
 {
-       ia_css_rx_port_clear_irq_info(IA_CSS_CSI2_PORT1, irq_infos);
+       ia_css_rx_port_clear_irq_info(MIPI_PORT1_ID, irq_infos);
 }
 
-void ia_css_rx_port_clear_irq_info(enum ia_css_csi2_port api_port, unsigned int irq_infos)
+void ia_css_rx_port_clear_irq_info(enum mipi_port_id api_port, unsigned int irq_infos)
 {
-       mipi_port_ID_t port = ia_css_isys_port_to_mipi_port(api_port);
+       enum mipi_port_id port = ia_css_isys_port_to_mipi_port(api_port);
        ia_css_isys_rx_clear_irq_info(port, irq_infos);
 }
 
-void ia_css_isys_rx_clear_irq_info(mipi_port_ID_t port, unsigned int irq_infos)
+void ia_css_isys_rx_clear_irq_info(enum mipi_port_id port, unsigned int irq_infos)
 {
        hrt_data bits = receiver_port_reg_load(RX0_ID,
                                port,
@@ -229,7 +229,7 @@ void ia_css_isys_rx_clear_irq_info(mipi_port_ID_t port, unsigned int irq_infos)
 #endif /* #if !defined(USE_INPUT_SYSTEM_VERSION_2401) */
 
 enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
-               enum ia_css_stream_format input_format,
+               enum atomisp_input_format input_format,
                mipi_predictor_t compression,
                unsigned int *fmt_type)
 {
@@ -244,25 +244,25 @@ enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
         */
        if (compression != MIPI_PREDICTOR_NONE) {
                switch (input_format) {
-               case IA_CSS_STREAM_FORMAT_RAW_6:
+               case ATOMISP_INPUT_FORMAT_RAW_6:
                        *fmt_type = 6;
                        break;
-               case IA_CSS_STREAM_FORMAT_RAW_7:
+               case ATOMISP_INPUT_FORMAT_RAW_7:
                        *fmt_type = 7;
                        break;
-               case IA_CSS_STREAM_FORMAT_RAW_8:
+               case ATOMISP_INPUT_FORMAT_RAW_8:
                        *fmt_type = 8;
                        break;
-               case IA_CSS_STREAM_FORMAT_RAW_10:
+               case ATOMISP_INPUT_FORMAT_RAW_10:
                        *fmt_type = 10;
                        break;
-               case IA_CSS_STREAM_FORMAT_RAW_12:
+               case ATOMISP_INPUT_FORMAT_RAW_12:
                        *fmt_type = 12;
                        break;
-               case IA_CSS_STREAM_FORMAT_RAW_14:
+               case ATOMISP_INPUT_FORMAT_RAW_14:
                        *fmt_type = 14;
                        break;
-               case IA_CSS_STREAM_FORMAT_RAW_16:
+               case ATOMISP_INPUT_FORMAT_RAW_16:
                        *fmt_type = 16;
                        break;
                default:
@@ -277,96 +277,96 @@ enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
         * MW: For some reason the mapping is not 1-to-1
         */
        switch (input_format) {
-       case IA_CSS_STREAM_FORMAT_RGB_888:
+       case ATOMISP_INPUT_FORMAT_RGB_888:
                *fmt_type = MIPI_FORMAT_RGB888;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_555:
+       case ATOMISP_INPUT_FORMAT_RGB_555:
                *fmt_type = MIPI_FORMAT_RGB555;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_444:
+       case ATOMISP_INPUT_FORMAT_RGB_444:
                *fmt_type = MIPI_FORMAT_RGB444;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_565:
+       case ATOMISP_INPUT_FORMAT_RGB_565:
                *fmt_type = MIPI_FORMAT_RGB565;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_666:
+       case ATOMISP_INPUT_FORMAT_RGB_666:
                *fmt_type = MIPI_FORMAT_RGB666;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_8:
+       case ATOMISP_INPUT_FORMAT_RAW_8:
                *fmt_type = MIPI_FORMAT_RAW8;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_10:
+       case ATOMISP_INPUT_FORMAT_RAW_10:
                *fmt_type = MIPI_FORMAT_RAW10;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_6:
+       case ATOMISP_INPUT_FORMAT_RAW_6:
                *fmt_type = MIPI_FORMAT_RAW6;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_7:
+       case ATOMISP_INPUT_FORMAT_RAW_7:
                *fmt_type = MIPI_FORMAT_RAW7;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_12:
+       case ATOMISP_INPUT_FORMAT_RAW_12:
                *fmt_type = MIPI_FORMAT_RAW12;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_14:
+       case ATOMISP_INPUT_FORMAT_RAW_14:
                *fmt_type = MIPI_FORMAT_RAW14;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_8:
+       case ATOMISP_INPUT_FORMAT_YUV420_8:
                *fmt_type = MIPI_FORMAT_YUV420_8;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_10:
+       case ATOMISP_INPUT_FORMAT_YUV420_10:
                *fmt_type = MIPI_FORMAT_YUV420_10;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV422_8:
+       case ATOMISP_INPUT_FORMAT_YUV422_8:
                *fmt_type = MIPI_FORMAT_YUV422_8;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV422_10:
+       case ATOMISP_INPUT_FORMAT_YUV422_10:
                *fmt_type = MIPI_FORMAT_YUV422_10;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
                *fmt_type = MIPI_FORMAT_YUV420_8_LEGACY;
                break;
-       case IA_CSS_STREAM_FORMAT_EMBEDDED:
+       case ATOMISP_INPUT_FORMAT_EMBEDDED:
                *fmt_type = MIPI_FORMAT_EMBEDDED;
                break;
 #ifndef USE_INPUT_SYSTEM_VERSION_2401
-       case IA_CSS_STREAM_FORMAT_RAW_16:
+       case ATOMISP_INPUT_FORMAT_RAW_16:
                /* This is not specified by Arasan, so we use
                 * 17 for now.
                 */
                *fmt_type = MIPI_FORMAT_RAW16;
                break;
-       case IA_CSS_STREAM_FORMAT_BINARY_8:
+       case ATOMISP_INPUT_FORMAT_BINARY_8:
                *fmt_type = MIPI_FORMAT_BINARY_8;
                break;
 #else
-       case IA_CSS_STREAM_FORMAT_USER_DEF1:
+       case ATOMISP_INPUT_FORMAT_USER_DEF1:
                *fmt_type = MIPI_FORMAT_CUSTOM0;
                break;
-       case IA_CSS_STREAM_FORMAT_USER_DEF2:
+       case ATOMISP_INPUT_FORMAT_USER_DEF2:
                *fmt_type = MIPI_FORMAT_CUSTOM1;
                break;
-       case IA_CSS_STREAM_FORMAT_USER_DEF3:
+       case ATOMISP_INPUT_FORMAT_USER_DEF3:
                *fmt_type = MIPI_FORMAT_CUSTOM2;
                break;
-       case IA_CSS_STREAM_FORMAT_USER_DEF4:
+       case ATOMISP_INPUT_FORMAT_USER_DEF4:
                *fmt_type = MIPI_FORMAT_CUSTOM3;
                break;
-       case IA_CSS_STREAM_FORMAT_USER_DEF5:
+       case ATOMISP_INPUT_FORMAT_USER_DEF5:
                *fmt_type = MIPI_FORMAT_CUSTOM4;
                break;
-       case IA_CSS_STREAM_FORMAT_USER_DEF6:
+       case ATOMISP_INPUT_FORMAT_USER_DEF6:
                *fmt_type = MIPI_FORMAT_CUSTOM5;
                break;
-       case IA_CSS_STREAM_FORMAT_USER_DEF7:
+       case ATOMISP_INPUT_FORMAT_USER_DEF7:
                *fmt_type = MIPI_FORMAT_CUSTOM6;
                break;
-       case IA_CSS_STREAM_FORMAT_USER_DEF8:
+       case ATOMISP_INPUT_FORMAT_USER_DEF8:
                *fmt_type = MIPI_FORMAT_CUSTOM7;
                break;
 #endif
 
-       case IA_CSS_STREAM_FORMAT_YUV420_16:
-       case IA_CSS_STREAM_FORMAT_YUV422_16:
+       case ATOMISP_INPUT_FORMAT_YUV420_16:
+       case ATOMISP_INPUT_FORMAT_YUV422_16:
        default:
                return IA_CSS_ERR_INTERNAL_ERROR;
        }
@@ -448,34 +448,34 @@ enum ia_css_err ia_css_isys_convert_compressed_format(
 }
 
 unsigned int ia_css_csi2_calculate_input_system_alignment(
-       enum ia_css_stream_format fmt_type)
+       enum atomisp_input_format fmt_type)
 {
        unsigned int memory_alignment_in_bytes = HIVE_ISP_DDR_WORD_BYTES;
 
        switch (fmt_type) {
-       case IA_CSS_STREAM_FORMAT_RAW_6:
-       case IA_CSS_STREAM_FORMAT_RAW_7:
-       case IA_CSS_STREAM_FORMAT_RAW_8:
-       case IA_CSS_STREAM_FORMAT_RAW_10:
-       case IA_CSS_STREAM_FORMAT_RAW_12:
-       case IA_CSS_STREAM_FORMAT_RAW_14:
+       case ATOMISP_INPUT_FORMAT_RAW_6:
+       case ATOMISP_INPUT_FORMAT_RAW_7:
+       case ATOMISP_INPUT_FORMAT_RAW_8:
+       case ATOMISP_INPUT_FORMAT_RAW_10:
+       case ATOMISP_INPUT_FORMAT_RAW_12:
+       case ATOMISP_INPUT_FORMAT_RAW_14:
                memory_alignment_in_bytes = 2 * ISP_VEC_NELEMS;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_8:
-       case IA_CSS_STREAM_FORMAT_YUV422_8:
-       case IA_CSS_STREAM_FORMAT_USER_DEF1:
-       case IA_CSS_STREAM_FORMAT_USER_DEF2:
-       case IA_CSS_STREAM_FORMAT_USER_DEF3:
-       case IA_CSS_STREAM_FORMAT_USER_DEF4:
-       case IA_CSS_STREAM_FORMAT_USER_DEF5:
-       case IA_CSS_STREAM_FORMAT_USER_DEF6:
-       case IA_CSS_STREAM_FORMAT_USER_DEF7:
-       case IA_CSS_STREAM_FORMAT_USER_DEF8:
+       case ATOMISP_INPUT_FORMAT_YUV420_8:
+       case ATOMISP_INPUT_FORMAT_YUV422_8:
+       case ATOMISP_INPUT_FORMAT_USER_DEF1:
+       case ATOMISP_INPUT_FORMAT_USER_DEF2:
+       case ATOMISP_INPUT_FORMAT_USER_DEF3:
+       case ATOMISP_INPUT_FORMAT_USER_DEF4:
+       case ATOMISP_INPUT_FORMAT_USER_DEF5:
+       case ATOMISP_INPUT_FORMAT_USER_DEF6:
+       case ATOMISP_INPUT_FORMAT_USER_DEF7:
+       case ATOMISP_INPUT_FORMAT_USER_DEF8:
                /* Planar YUV formats need to have all planes aligned, this means
                 * double the alignment for the Y plane if the horizontal decimation is 2. */
                memory_alignment_in_bytes = 2 * HIVE_ISP_DDR_WORD_BYTES;
                break;
-       case IA_CSS_STREAM_FORMAT_EMBEDDED:
+       case ATOMISP_INPUT_FORMAT_EMBEDDED:
        default:
                memory_alignment_in_bytes = HIVE_ISP_DDR_WORD_BYTES;
                break;
@@ -492,7 +492,7 @@ void ia_css_isys_rx_configure(const rx_cfg_t *config,
 #if defined(HAS_RX_VERSION_2)
        bool port_enabled[N_MIPI_PORT_ID];
        bool any_port_enabled = false;
-       mipi_port_ID_t port;
+       enum mipi_port_id port;
 
        if ((config == NULL)
                || (config->mode >= N_RX_MODE)
@@ -500,7 +500,7 @@ void ia_css_isys_rx_configure(const rx_cfg_t *config,
                assert(0);
                return;
        }
-       for (port = (mipi_port_ID_t) 0; port < N_MIPI_PORT_ID; port++) {
+       for (port = (enum mipi_port_id) 0; port < N_MIPI_PORT_ID; port++) {
                if (is_receiver_port_enabled(RX0_ID, port))
                        any_port_enabled = true;
        }
@@ -595,8 +595,8 @@ void ia_css_isys_rx_configure(const rx_cfg_t *config,
 
 void ia_css_isys_rx_disable(void)
 {
-       mipi_port_ID_t port;
-       for (port = (mipi_port_ID_t) 0; port < N_MIPI_PORT_ID; port++) {
+       enum mipi_port_id port;
+       for (port = (enum mipi_port_id) 0; port < N_MIPI_PORT_ID; port++) {
                receiver_port_reg_store(RX0_ID, port,
                                        _HRT_CSS_RECEIVER_DEVICE_READY_REG_IDX,
                                        false);
index 90922a7acefdcaca2f6aba26fed2c60c5adc6cb2..2484949453b7ccef8d9a725ee44df44d1b9a1d90 100644 (file)
@@ -331,7 +331,7 @@ static bool create_input_system_channel(
                break;
        }
 
-       if (rc == false)
+       if (!rc)
                return false;
 
        if (!acquire_sid(me->stream2mmio_id, &(me->stream2mmio_sid_id))) {
@@ -474,7 +474,7 @@ static bool calculate_input_system_channel_cfg(
 
        rc = calculate_stream2mmio_cfg(isys_cfg, metadata,
                        &(channel_cfg->stream2mmio_cfg));
-       if (rc == false)
+       if (!rc)
                return false;
 
        rc = calculate_ibuf_ctrl_cfg(
@@ -482,7 +482,7 @@ static bool calculate_input_system_channel_cfg(
                        input_port,
                        isys_cfg,
                        &(channel_cfg->ibuf_ctrl_cfg));
-       if (rc == false)
+       if (!rc)
                return false;
        if (metadata)
                channel_cfg->ibuf_ctrl_cfg.stores_per_frame = isys_cfg->metadata.lines_per_frame;
@@ -491,7 +491,7 @@ static bool calculate_input_system_channel_cfg(
                        channel,
                        isys_cfg,
                        &(channel_cfg->dma_cfg));
-       if (rc == false)
+       if (!rc)
                return false;
 
        rc = calculate_isys2401_dma_port_cfg(
@@ -499,7 +499,7 @@ static bool calculate_input_system_channel_cfg(
                        false,
                        metadata,
                        &(channel_cfg->dma_src_port_cfg));
-       if (rc == false)
+       if (!rc)
                return false;
 
        rc = calculate_isys2401_dma_port_cfg(
@@ -507,7 +507,7 @@ static bool calculate_input_system_channel_cfg(
                        isys_cfg->raw_packed,
                        metadata,
                        &(channel_cfg->dma_dest_port_cfg));
-       if (rc == false)
+       if (!rc)
                return false;
 
        return true;
index 81a50c73ad0bdc6acea0716279f1300ecfaaf124..4746620ca2129dfd316c83be831c8ad4bdabd9e8 100644 (file)
@@ -161,9 +161,9 @@ void ia_css_pipeline_start(enum ia_css_pipe_id pipe_id,
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-                               , (mipi_port_ID_t) 0
+                               , (enum mipi_port_id) 0
 #else
-                               (mipi_port_ID_t) 0,
+                               (enum mipi_port_id) 0,
 #endif
 #endif
 #ifndef ISP2401
@@ -574,7 +574,7 @@ static void pipeline_map_num_to_sp_thread(unsigned int pipe_num)
 
                But the below is more descriptive.
        */
-       assert(found_sp_thread != false);
+       assert(found_sp_thread);
 }
 
 static void pipeline_unmap_num_to_sp_thread(unsigned int pipe_num)
index 54239ac9d7c90a22c73e5781815fa1ae87bf8308..a4d8a48f95ba9a20d861aa0e4dc5af3bb9d4e237 100644 (file)
  * @brief VBUF resource handles
  */
 #define NUM_HANDLES 1000
-struct ia_css_rmgr_vbuf_handle handle_table[NUM_HANDLES];
+static struct ia_css_rmgr_vbuf_handle handle_table[NUM_HANDLES];
 
 /*
  * @brief VBUF resource pool - refpool
  */
-struct ia_css_rmgr_vbuf_pool refpool = {
+static struct ia_css_rmgr_vbuf_pool refpool = {
        false,                  /* copy_on_write */
        false,                  /* recycle */
        0,                      /* size */
@@ -40,7 +40,7 @@ struct ia_css_rmgr_vbuf_pool refpool = {
 /*
  * @brief VBUF resource pool - writepool
  */
-struct ia_css_rmgr_vbuf_pool writepool = {
+static struct ia_css_rmgr_vbuf_pool writepool = {
        true,                   /* copy_on_write */
        false,                  /* recycle */
        0,                      /* size */
@@ -51,7 +51,7 @@ struct ia_css_rmgr_vbuf_pool writepool = {
 /*
  * @brief VBUF resource pool - hmmbufferpool
  */
-struct ia_css_rmgr_vbuf_pool hmmbufferpool = {
+static struct ia_css_rmgr_vbuf_pool hmmbufferpool = {
        true,                   /* copy_on_write */
        true,                   /* recycle */
        32,                     /* size */
index 37116faab6315153eb10773d0a47274dd88caaef..c771e4b910f3d5d6dbcf5b1b978787c0f7ed9c2a 100644 (file)
@@ -462,46 +462,46 @@ verify_copy_out_frame_format(struct ia_css_pipe *pipe)
        assert(pipe->stream != NULL);
 
        switch (pipe->stream->config.input_config.format) {
-       case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
-       case IA_CSS_STREAM_FORMAT_YUV420_8:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
+       case ATOMISP_INPUT_FORMAT_YUV420_8:
                for (i=0; i<ARRAY_SIZE(yuv420_copy_formats) && !found; i++)
                        found = (out_fmt == yuv420_copy_formats[i]);
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_10:
-       case IA_CSS_STREAM_FORMAT_YUV420_16:
+       case ATOMISP_INPUT_FORMAT_YUV420_10:
+       case ATOMISP_INPUT_FORMAT_YUV420_16:
                found = (out_fmt == IA_CSS_FRAME_FORMAT_YUV420_16);
                break;
-       case IA_CSS_STREAM_FORMAT_YUV422_8:
+       case ATOMISP_INPUT_FORMAT_YUV422_8:
                for (i=0; i<ARRAY_SIZE(yuv422_copy_formats) && !found; i++)
                        found = (out_fmt == yuv422_copy_formats[i]);
                break;
-       case IA_CSS_STREAM_FORMAT_YUV422_10:
-       case IA_CSS_STREAM_FORMAT_YUV422_16:
+       case ATOMISP_INPUT_FORMAT_YUV422_10:
+       case ATOMISP_INPUT_FORMAT_YUV422_16:
                found = (out_fmt == IA_CSS_FRAME_FORMAT_YUV422_16 ||
                         out_fmt == IA_CSS_FRAME_FORMAT_YUV420_16);
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_444:
-       case IA_CSS_STREAM_FORMAT_RGB_555:
-       case IA_CSS_STREAM_FORMAT_RGB_565:
+       case ATOMISP_INPUT_FORMAT_RGB_444:
+       case ATOMISP_INPUT_FORMAT_RGB_555:
+       case ATOMISP_INPUT_FORMAT_RGB_565:
                found = (out_fmt == IA_CSS_FRAME_FORMAT_RGBA888 ||
                         out_fmt == IA_CSS_FRAME_FORMAT_RGB565);
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_666:
-       case IA_CSS_STREAM_FORMAT_RGB_888:
+       case ATOMISP_INPUT_FORMAT_RGB_666:
+       case ATOMISP_INPUT_FORMAT_RGB_888:
                found = (out_fmt == IA_CSS_FRAME_FORMAT_RGBA888 ||
                         out_fmt == IA_CSS_FRAME_FORMAT_YUV420);
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_6:
-       case IA_CSS_STREAM_FORMAT_RAW_7:
-       case IA_CSS_STREAM_FORMAT_RAW_8:
-       case IA_CSS_STREAM_FORMAT_RAW_10:
-       case IA_CSS_STREAM_FORMAT_RAW_12:
-       case IA_CSS_STREAM_FORMAT_RAW_14:
-       case IA_CSS_STREAM_FORMAT_RAW_16:
+       case ATOMISP_INPUT_FORMAT_RAW_6:
+       case ATOMISP_INPUT_FORMAT_RAW_7:
+       case ATOMISP_INPUT_FORMAT_RAW_8:
+       case ATOMISP_INPUT_FORMAT_RAW_10:
+       case ATOMISP_INPUT_FORMAT_RAW_12:
+       case ATOMISP_INPUT_FORMAT_RAW_14:
+       case ATOMISP_INPUT_FORMAT_RAW_16:
                found = (out_fmt == IA_CSS_FRAME_FORMAT_RAW) ||
                        (out_fmt == IA_CSS_FRAME_FORMAT_RAW_PACKED);
                break;
-       case IA_CSS_STREAM_FORMAT_BINARY_8:
+       case ATOMISP_INPUT_FORMAT_BINARY_8:
                found = (out_fmt == IA_CSS_FRAME_FORMAT_BINARY_8);
                break;
        default:
@@ -586,13 +586,13 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 }
 #elif !defined(HAS_NO_INPUT_SYSTEM) && defined(USE_INPUT_SYSTEM_VERSION_2401)
 static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
-               enum ia_css_stream_format       format,
+               enum atomisp_input_format       format,
                unsigned int                    pixels_per_line)
 {
        unsigned int rval;
 
        switch (format) {
-       case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
                /*
                 * The frame format layout is shown below.
                 *
@@ -611,9 +611,9 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
                 */
                rval = pixels_per_line * 2;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_8:
-       case IA_CSS_STREAM_FORMAT_YUV420_10:
-       case IA_CSS_STREAM_FORMAT_YUV420_16:
+       case ATOMISP_INPUT_FORMAT_YUV420_8:
+       case ATOMISP_INPUT_FORMAT_YUV420_10:
+       case ATOMISP_INPUT_FORMAT_YUV420_16:
                /*
                 * The frame format layout is shown below.
                 *
@@ -630,9 +630,9 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
                 */
                rval = pixels_per_line * 2;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV422_8:
-       case IA_CSS_STREAM_FORMAT_YUV422_10:
-       case IA_CSS_STREAM_FORMAT_YUV422_16:
+       case ATOMISP_INPUT_FORMAT_YUV422_8:
+       case ATOMISP_INPUT_FORMAT_YUV422_10:
+       case ATOMISP_INPUT_FORMAT_YUV422_16:
                /*
                 * The frame format layout is shown below.
                 *
@@ -649,11 +649,11 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
                 */
                rval = pixels_per_line * 2;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_444:
-       case IA_CSS_STREAM_FORMAT_RGB_555:
-       case IA_CSS_STREAM_FORMAT_RGB_565:
-       case IA_CSS_STREAM_FORMAT_RGB_666:
-       case IA_CSS_STREAM_FORMAT_RGB_888:
+       case ATOMISP_INPUT_FORMAT_RGB_444:
+       case ATOMISP_INPUT_FORMAT_RGB_555:
+       case ATOMISP_INPUT_FORMAT_RGB_565:
+       case ATOMISP_INPUT_FORMAT_RGB_666:
+       case ATOMISP_INPUT_FORMAT_RGB_888:
                /*
                 * The frame format layout is shown below.
                 *
@@ -670,22 +670,22 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
                 */
                rval = pixels_per_line * 4;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_6:
-       case IA_CSS_STREAM_FORMAT_RAW_7:
-       case IA_CSS_STREAM_FORMAT_RAW_8:
-       case IA_CSS_STREAM_FORMAT_RAW_10:
-       case IA_CSS_STREAM_FORMAT_RAW_12:
-       case IA_CSS_STREAM_FORMAT_RAW_14:
-       case IA_CSS_STREAM_FORMAT_RAW_16:
-       case IA_CSS_STREAM_FORMAT_BINARY_8:
-       case IA_CSS_STREAM_FORMAT_USER_DEF1:
-       case IA_CSS_STREAM_FORMAT_USER_DEF2:
-       case IA_CSS_STREAM_FORMAT_USER_DEF3:
-       case IA_CSS_STREAM_FORMAT_USER_DEF4:
-       case IA_CSS_STREAM_FORMAT_USER_DEF5:
-       case IA_CSS_STREAM_FORMAT_USER_DEF6:
-       case IA_CSS_STREAM_FORMAT_USER_DEF7:
-       case IA_CSS_STREAM_FORMAT_USER_DEF8:
+       case ATOMISP_INPUT_FORMAT_RAW_6:
+       case ATOMISP_INPUT_FORMAT_RAW_7:
+       case ATOMISP_INPUT_FORMAT_RAW_8:
+       case ATOMISP_INPUT_FORMAT_RAW_10:
+       case ATOMISP_INPUT_FORMAT_RAW_12:
+       case ATOMISP_INPUT_FORMAT_RAW_14:
+       case ATOMISP_INPUT_FORMAT_RAW_16:
+       case ATOMISP_INPUT_FORMAT_BINARY_8:
+       case ATOMISP_INPUT_FORMAT_USER_DEF1:
+       case ATOMISP_INPUT_FORMAT_USER_DEF2:
+       case ATOMISP_INPUT_FORMAT_USER_DEF3:
+       case ATOMISP_INPUT_FORMAT_USER_DEF4:
+       case ATOMISP_INPUT_FORMAT_USER_DEF5:
+       case ATOMISP_INPUT_FORMAT_USER_DEF6:
+       case ATOMISP_INPUT_FORMAT_USER_DEF7:
+       case ATOMISP_INPUT_FORMAT_USER_DEF8:
                /*
                 * The frame format layout is shown below.
                 *
@@ -742,11 +742,11 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_id(
                break;
        case IA_CSS_INPUT_MODE_BUFFERED_SENSOR:
 
-               if (stream_cfg->source.port.port == IA_CSS_CSI2_PORT0) {
+               if (stream_cfg->source.port.port == MIPI_PORT0_ID) {
                        isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT0_ID;
-               } else if (stream_cfg->source.port.port == IA_CSS_CSI2_PORT1) {
+               } else if (stream_cfg->source.port.port == MIPI_PORT1_ID) {
                        isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT1_ID;
-               } else if (stream_cfg->source.port.port == IA_CSS_CSI2_PORT2) {
+               } else if (stream_cfg->source.port.port == MIPI_PORT2_ID) {
                        isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT2_ID;
                }
 
@@ -927,7 +927,7 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_resolution(
        unsigned int max_subpixels_per_line;
        unsigned int lines_per_frame;
        unsigned int align_req_in_bytes;
-       enum ia_css_stream_format fmt_type;
+       enum atomisp_input_format fmt_type;
 
        fmt_type = stream_cfg->isys_config[isys_stream_idx].format;
        if ((stream_cfg->mode == IA_CSS_INPUT_MODE_SENSOR ||
@@ -936,11 +936,11 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_resolution(
 
                if (stream_cfg->source.port.compression.uncompressed_bits_per_pixel ==
                        UNCOMPRESSED_BITS_PER_PIXEL_10) {
-                               fmt_type = IA_CSS_STREAM_FORMAT_RAW_10;
+                               fmt_type = ATOMISP_INPUT_FORMAT_RAW_10;
                }
                else if (stream_cfg->source.port.compression.uncompressed_bits_per_pixel ==
                        UNCOMPRESSED_BITS_PER_PIXEL_12) {
-                               fmt_type = IA_CSS_STREAM_FORMAT_RAW_12;
+                               fmt_type = ATOMISP_INPUT_FORMAT_RAW_12;
                }
                else
                        return false;
@@ -1082,7 +1082,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 
        /* get the SP thread id */
        rc = ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &sp_thread_id);
-       if (rc != true)
+       if (!rc)
                return IA_CSS_ERR_INTERNAL_ERROR;
        /* get the target input terminal */
        sp_pipeline_input_terminal = &(sh_css_sp_group.pipe_io[sp_thread_id].input);
@@ -1108,7 +1108,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
                                        &(isys_stream_descr));
                }
 
-               if (rc != true)
+               if (!rc)
                        return IA_CSS_ERR_INTERNAL_ERROR;
 
                isys_stream_id = ia_css_isys_generate_stream_id(sp_thread_id, i);
@@ -1118,7 +1118,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
                                &(isys_stream_descr),
                                &(sp_pipeline_input_terminal->context.virtual_input_system_stream[i]),
                                isys_stream_id);
-               if (rc != true)
+               if (!rc)
                        return IA_CSS_ERR_INTERNAL_ERROR;
 
                /* calculate the configuration of the virtual Input System (2401) */
@@ -1126,7 +1126,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
                                &(sp_pipeline_input_terminal->context.virtual_input_system_stream[i]),
                                &(isys_stream_descr),
                                &(sp_pipeline_input_terminal->ctrl.virtual_input_system_stream_cfg[i]));
-               if (rc != true) {
+               if (!rc) {
                        ia_css_isys_stream_destroy(&(sp_pipeline_input_terminal->context.virtual_input_system_stream[i]));
                        return IA_CSS_ERR_INTERNAL_ERROR;
                }
@@ -1195,7 +1195,7 @@ static inline struct ia_css_pipe *stream_get_target_pipe(
 
 static enum ia_css_err stream_csi_rx_helper(
        struct ia_css_stream *stream,
-       enum ia_css_err (*func)(enum ia_css_csi2_port, uint32_t))
+       enum ia_css_err (*func)(enum mipi_port_id, uint32_t))
 {
        enum ia_css_err retval = IA_CSS_ERR_INTERNAL_ERROR;
        uint32_t sp_thread_id, stream_id;
@@ -1391,7 +1391,7 @@ start_copy_on_sp(struct ia_css_pipe *pipe,
                ia_css_isys_rx_disable();
 #endif
 
-       if (pipe->stream->config.input_config.format != IA_CSS_STREAM_FORMAT_BINARY_8)
+       if (pipe->stream->config.input_config.format != ATOMISP_INPUT_FORMAT_BINARY_8)
                return IA_CSS_ERR_INTERNAL_ERROR;
        sh_css_sp_start_binary_copy(ia_css_pipe_get_pipe_num(pipe), out_frame, pipe->stream->config.pixels_per_clock == 2);
 
@@ -1454,7 +1454,7 @@ static void start_pipe(
                                &me->stream->info.metadata_info
 #if !defined(HAS_NO_INPUT_SYSTEM)
                                ,(input_mode==IA_CSS_INPUT_MODE_MEMORY) ?
-                                       (mipi_port_ID_t)0 :
+                                       (enum mipi_port_id)0 :
                                        me->stream->config.source.port.port
 #endif
 #ifdef ISP2401
@@ -1497,7 +1497,7 @@ static void
 enable_interrupts(enum ia_css_irq_type irq_type)
 {
 #ifdef USE_INPUT_SYSTEM_VERSION_2
-       mipi_port_ID_t port;
+       enum mipi_port_id port;
 #endif
        bool enable_pulse = irq_type != IA_CSS_IRQ_TYPE_EDGE;
        IA_CSS_ENTER_PRIVATE("");
@@ -2562,7 +2562,7 @@ ia_css_uninit(void)
        ifmtr_set_if_blocking_mode_reset = true;
 #endif
 
-       if (fw_explicitly_loaded == false) {
+       if (!fw_explicitly_loaded) {
                ia_css_unload_firmware();
        }
        ia_css_spctrl_unload_fw(SP0_ID);
@@ -4074,9 +4074,9 @@ preview_start(struct ia_css_pipe *pipe)
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-                       , (mipi_port_ID_t)0
+                       , (enum mipi_port_id)0
 #else
-                       (mipi_port_ID_t)0,
+                       (enum mipi_port_id)0,
 #endif
 #endif
 #ifndef ISP2401
@@ -4106,9 +4106,9 @@ preview_start(struct ia_css_pipe *pipe)
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-                       , (mipi_port_ID_t) 0
+                       , (enum mipi_port_id) 0
 #else
-                       (mipi_port_ID_t) 0,
+                       (enum mipi_port_id) 0,
 #endif
 #endif
 #ifndef ISP2401
@@ -4673,7 +4673,7 @@ ia_css_dequeue_psys_event(struct ia_css_event *event)
        event->type = convert_event_sp_to_host_domain[payload[0]];
        /* Some sane default values since not all events use all fields. */
        event->pipe = NULL;
-       event->port = IA_CSS_CSI2_PORT0;
+       event->port = MIPI_PORT0_ID;
        event->exp_id = 0;
        event->fw_warning = IA_CSS_FW_WARNING_NONE;
        event->fw_handle = 0;
@@ -4719,7 +4719,7 @@ ia_css_dequeue_psys_event(struct ia_css_event *event)
                }
        }
        if (event->type == IA_CSS_EVENT_TYPE_PORT_EOF) {
-               event->port = (enum ia_css_csi2_port)payload[1];
+               event->port = (enum mipi_port_id)payload[1];
                event->exp_id = payload[3];
        } else if (event->type == IA_CSS_EVENT_TYPE_FW_WARNING) {
                event->fw_warning = (enum ia_css_fw_warning)payload[1];
@@ -5949,9 +5949,9 @@ static enum ia_css_err video_start(struct ia_css_pipe *pipe)
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-                       , (mipi_port_ID_t)0
+                       , (enum mipi_port_id)0
 #else
-                       (mipi_port_ID_t)0,
+                       (enum mipi_port_id)0,
 #endif
 #endif
 #ifndef ISP2401
@@ -6784,7 +6784,7 @@ static bool copy_on_sp(struct ia_css_pipe *pipe)
 
        rval &= (pipe->config.default_capture_config.mode == IA_CSS_CAPTURE_MODE_RAW);
 
-       rval &= ((pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) ||
+       rval &= ((pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) ||
                (pipe->config.mode == IA_CSS_PIPE_MODE_COPY));
 
        return rval;
@@ -6817,7 +6817,7 @@ static enum ia_css_err load_capture_binaries(
                return err;
        }
        if (copy_on_sp(pipe) &&
-           pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) {
+           pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
                ia_css_frame_info_init(
                        &pipe->output_info[0],
                        JPEG_BYTES,
@@ -6915,7 +6915,7 @@ need_yuv_scaler_stage(const struct ia_css_pipe *pipe)
 
        /* TODO: make generic function */
        need_format_conversion =
-               ((pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY) &&
+               ((pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY) &&
                (pipe->output_info[0].format != IA_CSS_FRAME_FORMAT_CSI_MIPI_LEGACY_YUV420_8));
 
        in_res = pipe->config.input_effective_res;
@@ -7304,7 +7304,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
        /*
         * NOTES
         * - Why does the "yuvpp" pipe needs "isp_copy_binary" (i.e. ISP Copy) when
-        *   its input is "IA_CSS_STREAM_FORMAT_YUV422_8"?
+        *   its input is "ATOMISP_INPUT_FORMAT_YUV422_8"?
         *
         *   In most use cases, the first stage in the "yuvpp" pipe is the "yuv_scale_
         *   binary". However, the "yuv_scale_binary" does NOT support the input-frame
@@ -7319,7 +7319,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
         *   "yuv_scale_binary".
         */
        need_isp_copy_binary =
-               (pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV422_8);
+               (pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV422_8);
 #else  /* !USE_INPUT_SYSTEM_VERSION_2401 */
        need_isp_copy_binary = true;
 #endif /*  USE_INPUT_SYSTEM_VERSION_2401 */
@@ -7627,11 +7627,11 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
                 * Bayer-Quad RAW.
                 */
                int in_frame_format;
-               if (pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY) {
+               if (pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY) {
                        in_frame_format = IA_CSS_FRAME_FORMAT_CSI_MIPI_LEGACY_YUV420_8;
-               } else if (pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV422_8) {
+               } else if (pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV422_8) {
                        /*
-                        * When the sensor output frame format is "IA_CSS_STREAM_FORMAT_YUV422_8",
+                        * When the sensor output frame format is "ATOMISP_INPUT_FORMAT_YUV422_8",
                         * the "isp_copy_var" binary is selected as the first stage in the yuvpp
                         * pipe.
                         *
@@ -7739,7 +7739,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 
                for (i = 0, j = 0; i < num_stage; i++) {
                        assert(j < num_output_stage);
-                       if (pipe->pipe_settings.yuvpp.is_output_stage[i] == true) {
+                       if (pipe->pipe_settings.yuvpp.is_output_stage[i]) {
                                tmp_out_frame = out_frame[j];
                                tmp_vf_frame = vf_frame[j];
                        } else {
@@ -7758,7 +7758,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
                        }
                        /* we use output port 1 as internal output port */
                        tmp_in_frame = yuv_scaler_stage->args.out_frame[1];
-                       if (pipe->pipe_settings.yuvpp.is_output_stage[i] == true) {
+                       if (pipe->pipe_settings.yuvpp.is_output_stage[i]) {
                                if (tmp_vf_frame && (tmp_vf_frame->info.res.width != 0)) {
                                        in_frame = yuv_scaler_stage->args.out_vf_frame;
                                        err = add_vf_pp_stage(pipe, in_frame, tmp_vf_frame, &vf_pp_binary[j],
@@ -7812,7 +7812,7 @@ create_host_copy_pipeline(struct ia_css_pipe *pipe,
        out_frame->flash_state = IA_CSS_FRAME_FLASH_STATE_NONE;
 
        if (copy_on_sp(pipe) &&
-           pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) {
+           pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
                ia_css_frame_info_init(
                        &out_frame->info,
                        JPEG_BYTES,
@@ -8044,7 +8044,6 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
        }
 
        if (mode == IA_CSS_CAPTURE_MODE_PRIMARY) {
-               unsigned int frm;
                struct ia_css_frame *local_in_frame = NULL;
                struct ia_css_frame *local_out_frame = NULL;
 
@@ -8082,7 +8081,6 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
                                return err;
                        }
                }
-               (void)frm;
                /* If we use copy iso primary,
                   the input must be yuv iso raw */
                current_stage->args.copy_vf =
@@ -8321,8 +8319,6 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
                                  struct ia_css_frame_info *info,
                                  unsigned int idx)
 {
-       enum ia_css_err err = IA_CSS_SUCCESS;
-
        assert(pipe != NULL);
        assert(info != NULL);
 
@@ -8331,7 +8327,7 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 
        *info = pipe->output_info[idx];
        if (copy_on_sp(pipe) &&
-           pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) {
+           pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
                ia_css_frame_info_init(
                        info,
                        JPEG_BYTES,
@@ -8347,7 +8343,7 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 
        ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
                                                "sh_css_pipe_get_output_frame_info() leave:\n");
-       return err;
+       return IA_CSS_SUCCESS;
 }
 
 #if !defined(HAS_NO_INPUT_SYSTEM)
@@ -8392,7 +8388,7 @@ ia_css_stream_send_input_line(const struct ia_css_stream *stream,
 
 void
 ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
-               enum ia_css_stream_format format,
+               enum atomisp_input_format format,
                const unsigned short *data,
                unsigned int width)
 {
@@ -9176,7 +9172,7 @@ ia_css_stream_configure_rx(struct ia_css_stream *stream)
        else if (config->num_lanes != 0)
                return IA_CSS_ERR_INVALID_ARGUMENTS;
 
-       if (config->port > IA_CSS_CSI2_PORT2)
+       if (config->port > MIPI_PORT2_ID)
                return IA_CSS_ERR_INVALID_ARGUMENTS;
        stream->csi_rx_config.port =
                ia_css_isys_port_to_mipi_port(config->port);
@@ -9363,7 +9359,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 
 #if defined(USE_INPUT_SYSTEM_VERSION_2)
        /* We don't support metadata for JPEG stream, since they both use str2mem */
-       if (stream_config->input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8 &&
+       if (stream_config->input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8 &&
            stream_config->metadata_config.resolution.height > 0) {
                err = IA_CSS_ERR_INVALID_ARGUMENTS;
                IA_CSS_LEAVE_ERR(err);
@@ -10142,7 +10138,7 @@ ia_css_temp_pipe_to_pipe_id(const struct ia_css_pipe *pipe, enum ia_css_pipe_id
        return IA_CSS_SUCCESS;
 }
 
-enum ia_css_stream_format
+enum atomisp_input_format
 ia_css_stream_get_format(const struct ia_css_stream *stream)
 {
        return stream->config.input_config.format;
@@ -10218,8 +10214,6 @@ ia_css_stream_get_3a_binary(const struct ia_css_stream *stream)
 enum ia_css_err
 ia_css_stream_set_output_padded_width(struct ia_css_stream *stream, unsigned int output_padded_width)
 {
-       enum ia_css_err err = IA_CSS_SUCCESS;
-
        struct ia_css_pipe *pipe;
 
        assert(stream != NULL);
@@ -10232,7 +10226,7 @@ ia_css_stream_set_output_padded_width(struct ia_css_stream *stream, unsigned int
        pipe->config.output_info[IA_CSS_PIPE_OUTPUT_STAGE_0].padded_width = output_padded_width;
        pipe->output_info[IA_CSS_PIPE_OUTPUT_STAGE_0].padded_width = output_padded_width;
 
-       return err;
+       return IA_CSS_SUCCESS;
 }
 
 static struct ia_css_binary *
@@ -10734,7 +10728,7 @@ ia_css_pipe_set_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle, bool
                                (uint8_t) IA_CSS_PSYS_SW_EVENT_STAGE_ENABLE_DISABLE,
                                (uint8_t) thread_id,
                                (uint8_t) stage->stage_num,
-                               (enable == true) ? 1 : 0);
+                               enable ? 1 : 0);
                        if (err == IA_CSS_SUCCESS) {
                                if(enable)
                                        SH_CSS_QOS_STAGE_ENABLE(&(sh_css_sp_group.pipe[thread_id]),stage->stage_num);
@@ -11059,7 +11053,7 @@ static struct sh_css_hmm_buffer_record
 
        buffer_record = &hmm_buffer_record[0];
        for (i = 0; i < MAX_HMM_BUFFER_NUM; i++) {
-               if (buffer_record->in_use == false) {
+               if (!buffer_record->in_use) {
                        buffer_record->in_use = true;
                        buffer_record->type = type;
                        buffer_record->h_vbuf = h_vbuf;
@@ -11083,7 +11077,7 @@ static struct sh_css_hmm_buffer_record
 
        buffer_record = &hmm_buffer_record[0];
        for (i = 0; i < MAX_HMM_BUFFER_NUM; i++) {
-               if ((buffer_record->in_use == true) &&
+               if ((buffer_record->in_use) &&
                    (buffer_record->type == type) &&
                    (buffer_record->h_vbuf != NULL) &&
                    (buffer_record->h_vbuf->vptr == ddr_buffer_addr)) {
@@ -11093,7 +11087,7 @@ static struct sh_css_hmm_buffer_record
                buffer_record++;
        }
 
-       if (found_record == true)
+       if (found_record)
                return buffer_record;
        else
                return NULL;
index 883474e90c8173912b9800a79f34a9d257ae50a5..a6a00024bae842427315ef596ea1772bedd6ba65 100644 (file)
@@ -104,7 +104,7 @@ static bool ia_css_mipi_is_source_port_valid(struct ia_css_pipe *pipe,
 enum ia_css_err
 ia_css_mipi_frame_calculate_size(const unsigned int width,
                                const unsigned int height,
-                               const enum ia_css_stream_format format,
+                               const enum atomisp_input_format format,
                                const bool hasSOLandEOL,
                                const unsigned int embedded_data_size_words,
                                unsigned int *size_mem_words)
@@ -136,16 +136,16 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
                     width_padded, height, format, hasSOLandEOL, embedded_data_size_words);
 
        switch (format) {
-       case IA_CSS_STREAM_FORMAT_RAW_6:                /* 4p, 3B, 24bits */
+       case ATOMISP_INPUT_FORMAT_RAW_6:                /* 4p, 3B, 24bits */
                bits_per_pixel = 6;     break;
-       case IA_CSS_STREAM_FORMAT_RAW_7:                /* 8p, 7B, 56bits */
+       case ATOMISP_INPUT_FORMAT_RAW_7:                /* 8p, 7B, 56bits */
                bits_per_pixel = 7;             break;
-       case IA_CSS_STREAM_FORMAT_RAW_8:                /* 1p, 1B, 8bits */
-       case IA_CSS_STREAM_FORMAT_BINARY_8:             /*  8bits, TODO: check. */
-       case IA_CSS_STREAM_FORMAT_YUV420_8:             /* odd 2p, 2B, 16bits, even 2p, 4B, 32bits */
+       case ATOMISP_INPUT_FORMAT_RAW_8:                /* 1p, 1B, 8bits */
+       case ATOMISP_INPUT_FORMAT_BINARY_8:             /*  8bits, TODO: check. */
+       case ATOMISP_INPUT_FORMAT_YUV420_8:             /* odd 2p, 2B, 16bits, even 2p, 4B, 32bits */
                bits_per_pixel = 8;             break;
-       case IA_CSS_STREAM_FORMAT_YUV420_10:            /* odd 4p, 5B, 40bits, even 4p, 10B, 80bits */
-       case IA_CSS_STREAM_FORMAT_RAW_10:               /* 4p, 5B, 40bits */
+       case ATOMISP_INPUT_FORMAT_YUV420_10:            /* odd 4p, 5B, 40bits, even 4p, 10B, 80bits */
+       case ATOMISP_INPUT_FORMAT_RAW_10:               /* 4p, 5B, 40bits */
 #if !defined(HAS_NO_PACKED_RAW_PIXELS)
                /* The changes will be reverted as soon as RAW
                 * Buffers are deployed by the 2401 Input System
@@ -156,26 +156,26 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
                bits_per_pixel = 16;
 #endif
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:      /* 2p, 3B, 24bits */
-       case IA_CSS_STREAM_FORMAT_RAW_12:               /* 2p, 3B, 24bits */
+       case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:      /* 2p, 3B, 24bits */
+       case ATOMISP_INPUT_FORMAT_RAW_12:               /* 2p, 3B, 24bits */
                bits_per_pixel = 12;    break;
-       case IA_CSS_STREAM_FORMAT_RAW_14:               /* 4p, 7B, 56bits */
+       case ATOMISP_INPUT_FORMAT_RAW_14:               /* 4p, 7B, 56bits */
                bits_per_pixel = 14;    break;
-       case IA_CSS_STREAM_FORMAT_RGB_444:              /* 1p, 2B, 16bits */
-       case IA_CSS_STREAM_FORMAT_RGB_555:              /* 1p, 2B, 16bits */
-       case IA_CSS_STREAM_FORMAT_RGB_565:              /* 1p, 2B, 16bits */
-       case IA_CSS_STREAM_FORMAT_YUV422_8:             /* 2p, 4B, 32bits */
+       case ATOMISP_INPUT_FORMAT_RGB_444:              /* 1p, 2B, 16bits */
+       case ATOMISP_INPUT_FORMAT_RGB_555:              /* 1p, 2B, 16bits */
+       case ATOMISP_INPUT_FORMAT_RGB_565:              /* 1p, 2B, 16bits */
+       case ATOMISP_INPUT_FORMAT_YUV422_8:             /* 2p, 4B, 32bits */
                bits_per_pixel = 16;    break;
-       case IA_CSS_STREAM_FORMAT_RGB_666:              /* 4p, 9B, 72bits */
+       case ATOMISP_INPUT_FORMAT_RGB_666:              /* 4p, 9B, 72bits */
                bits_per_pixel = 18;    break;
-       case IA_CSS_STREAM_FORMAT_YUV422_10:            /* 2p, 5B, 40bits */
+       case ATOMISP_INPUT_FORMAT_YUV422_10:            /* 2p, 5B, 40bits */
                bits_per_pixel = 20;    break;
-       case IA_CSS_STREAM_FORMAT_RGB_888:              /* 1p, 3B, 24bits */
+       case ATOMISP_INPUT_FORMAT_RGB_888:              /* 1p, 3B, 24bits */
                bits_per_pixel = 24;    break;
 
-       case IA_CSS_STREAM_FORMAT_YUV420_16:            /* Not supported */
-       case IA_CSS_STREAM_FORMAT_YUV422_16:            /* Not supported */
-       case IA_CSS_STREAM_FORMAT_RAW_16:               /* TODO: not specified in MIPI SPEC, check */
+       case ATOMISP_INPUT_FORMAT_YUV420_16:            /* Not supported */
+       case ATOMISP_INPUT_FORMAT_YUV422_16:            /* Not supported */
+       case ATOMISP_INPUT_FORMAT_RAW_16:               /* TODO: not specified in MIPI SPEC, check */
        default:
                return IA_CSS_ERR_INVALID_ARGUMENTS;
        }
@@ -183,9 +183,9 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
        odd_line_bytes = (width_padded * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 
        /* Even lines for YUV420 formats are double in bits_per_pixel. */
-       if (format == IA_CSS_STREAM_FORMAT_YUV420_8
-                       || format == IA_CSS_STREAM_FORMAT_YUV420_10
-                       || format == IA_CSS_STREAM_FORMAT_YUV420_16) {
+       if (format == ATOMISP_INPUT_FORMAT_YUV420_8
+                       || format == ATOMISP_INPUT_FORMAT_YUV420_10
+                       || format == ATOMISP_INPUT_FORMAT_YUV420_16) {
                even_line_bytes = (width_padded * 2 * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
        } else {
                even_line_bytes = odd_line_bytes;
@@ -239,7 +239,7 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
 
 #if !defined(HAS_NO_INPUT_SYSTEM) && defined(USE_INPUT_SYSTEM_VERSION_2)
 enum ia_css_err
-ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
+ia_css_mipi_frame_enable_check_on_size(const enum mipi_port_id port,
                                const unsigned int      size_mem_words)
 {
        uint32_t idx;
@@ -285,7 +285,7 @@ calculate_mipi_buff_size(
 #else
        unsigned int width;
        unsigned int height;
-       enum ia_css_stream_format format;
+       enum atomisp_input_format format;
        bool pack_raw_pixels;
 
        unsigned int width_padded;
@@ -348,15 +348,15 @@ calculate_mipi_buff_size(
 
        bits_per_pixel = sh_css_stream_format_2_bits_per_subpixel(format);
        bits_per_pixel =
-               (format == IA_CSS_STREAM_FORMAT_RAW_10 && pack_raw_pixels) ? bits_per_pixel : 16;
+               (format == ATOMISP_INPUT_FORMAT_RAW_10 && pack_raw_pixels) ? bits_per_pixel : 16;
        if (bits_per_pixel == 0)
                return IA_CSS_ERR_INTERNAL_ERROR;
 
        odd_line_bytes = (width_padded * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 
        /* Even lines for YUV420 formats are double in bits_per_pixel. */
-       if (format == IA_CSS_STREAM_FORMAT_YUV420_8
-               || format == IA_CSS_STREAM_FORMAT_YUV420_10) {
+       if (format == ATOMISP_INPUT_FORMAT_YUV420_8
+               || format == ATOMISP_INPUT_FORMAT_YUV420_10) {
                even_line_bytes = (width_padded * 2 * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
        } else {
                even_line_bytes = odd_line_bytes;
index fbb36112fe3c84417a1be682e8678f20ee7b94b5..43529b1605c3a80496bea282657bbfd2f6bcc762 100644 (file)
 #define FPNTBL_BYTES(binary) \
        (sizeof(char) * (binary)->in_frame_info.res.height * \
         (binary)->in_frame_info.padded_width)
-        
+
 #ifndef ISP2401
 
 #define SCTBL_BYTES(binary) \
@@ -1741,7 +1741,7 @@ ia_css_process_zoom_and_motion(
                                out_infos[0] = &args->out_frame[0]->info;
                        info = &stage->firmware->info.isp;
                        ia_css_binary_fill_info(info, false, false,
-                               IA_CSS_STREAM_FORMAT_RAW_10,
+                               ATOMISP_INPUT_FORMAT_RAW_10,
                                args->in_frame  ? &args->in_frame->info  : NULL,
                                NULL,
                                out_infos,
@@ -2891,8 +2891,8 @@ ia_css_metadata_free_multiple(unsigned int num_bufs, struct ia_css_metadata **bu
        }
 }
 
-unsigned g_param_buffer_dequeue_count = 0;
-unsigned g_param_buffer_enqueue_count = 0;
+static unsigned g_param_buffer_dequeue_count = 0;
+static unsigned g_param_buffer_enqueue_count = 0;
 
 enum ia_css_err
 ia_css_stream_isp_parameters_init(struct ia_css_stream *stream)
@@ -3805,7 +3805,6 @@ sh_css_param_update_isp_params(struct ia_css_pipe *curr_pipe,
 
                enum sh_css_queue_id queue_id;
 
-               (void)stage;
                pipe = curr_pipe->stream->pipes[i];
                pipeline = ia_css_pipe_get_pipeline(pipe);
                pipe_num = ia_css_pipe_get_pipe_num(pipe);
index 6fc00fc402b14991e0b3b267abc9320ca7dbd513..85263725540d4f7a5ecb64421df390218dff7179 100644 (file)
@@ -71,7 +71,7 @@
 struct sh_css_sp_group         sh_css_sp_group;
 struct sh_css_sp_stage         sh_css_sp_stage;
 struct sh_css_isp_stage                sh_css_isp_stage;
-struct sh_css_sp_output                sh_css_sp_output;
+static struct sh_css_sp_output         sh_css_sp_output;
 static struct sh_css_sp_per_frame_data per_frame_data;
 
 /* true if SP supports frame loop and host2sp_commands */
@@ -117,9 +117,9 @@ copy_isp_stage_to_sp_stage(void)
        */
        sh_css_sp_stage.enable.sdis = sh_css_isp_stage.binary_info.enable.dis;
        sh_css_sp_stage.enable.s3a = sh_css_isp_stage.binary_info.enable.s3a;
-#ifdef ISP2401 
+#ifdef ISP2401
        sh_css_sp_stage.enable.lace_stats = sh_css_isp_stage.binary_info.enable.lace_stats;
-#endif 
+#endif
 }
 
 void
@@ -754,7 +754,7 @@ sh_css_sp_write_frame_pointers(const struct sh_css_binary_args *args)
 
 static void
 sh_css_sp_init_group(bool two_ppc,
-                    enum ia_css_stream_format input_format,
+                    enum atomisp_input_format input_format,
                     bool no_isp_sync,
                     uint8_t if_config_index)
 {
@@ -817,7 +817,6 @@ configure_isp_from_args(
        bool two_ppc,
        bool deinterleaved)
 {
-       enum ia_css_err err = IA_CSS_SUCCESS;
 #ifdef ISP2401
        struct ia_css_pipe *pipe = find_pipe_by_num(pipeline->pipe_num);
        const struct ia_css_resolution *res;
@@ -841,7 +840,7 @@ configure_isp_from_args(
        ia_css_ref_configure(binary, (const struct ia_css_frame **)args->delay_frames, pipeline->dvs_frame_delay);
        ia_css_tnr_configure(binary, (const struct ia_css_frame **)args->tnr_frames);
        ia_css_bayer_io_config(binary, args);
-       return err;
+       return IA_CSS_SUCCESS;
 }
 
 static void
@@ -1118,7 +1117,7 @@ sp_init_stage(struct ia_css_pipeline_stage *stage,
                        out_infos[0] = &args->out_frame[0]->info;
                info = &firmware->info.isp;
                ia_css_binary_fill_info(info, false, false,
-                           IA_CSS_STREAM_FORMAT_RAW_10,
+                           ATOMISP_INPUT_FORMAT_RAW_10,
                            args->in_frame  ? &args->in_frame->info  : NULL,
                            NULL,
                                out_infos,
@@ -1197,7 +1196,7 @@ sh_css_sp_init_pipeline(struct ia_css_pipeline *me,
                        const struct ia_css_metadata_config *md_config,
                        const struct ia_css_metadata_info *md_info,
 #if !defined(HAS_NO_INPUT_SYSTEM)
-                       const mipi_port_ID_t port_id
+                       const enum mipi_port_id port_id
 #endif
 #ifdef ISP2401
                        ,
@@ -1442,8 +1441,6 @@ sh_css_update_host2sp_offline_frame(
        unsigned int HIVE_ADDR_host_sp_com;
        unsigned int offset;
 
-       (void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
        assert(frame_num < NUM_CONTINUOUS_FRAMES);
 
        /* Write new frame data into SP DMEM */
@@ -1473,8 +1470,6 @@ sh_css_update_host2sp_mipi_frame(
        unsigned int HIVE_ADDR_host_sp_com;
        unsigned int offset;
 
-       (void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
        /* MIPI buffers are dedicated to port, so now there are more of them. */
        assert(frame_num < (N_CSI_PORTS * NUM_MIPI_FRAMES_PER_STREAM));
 
@@ -1500,8 +1495,6 @@ sh_css_update_host2sp_mipi_metadata(
        unsigned int HIVE_ADDR_host_sp_com;
        unsigned int o;
 
-       (void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
        /* MIPI buffers are dedicated to port, so now there are more of them. */
        assert(frame_num < (N_CSI_PORTS * NUM_MIPI_FRAMES_PER_STREAM));
 
@@ -1520,8 +1513,6 @@ sh_css_update_host2sp_num_mipi_frames(unsigned num_frames)
        unsigned int HIVE_ADDR_host_sp_com;
        unsigned int offset;
 
-       (void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
        /* Write new frame data into SP DMEM */
        HIVE_ADDR_host_sp_com = sh_css_sp_fw.info.sp.host_sp_com;
        offset = (unsigned int)offsetof(struct host_sp_communication, host2sp_num_mipi_frames)
@@ -1539,8 +1530,6 @@ sh_css_update_host2sp_cont_num_raw_frames(unsigned num_frames, bool set_avail)
        unsigned int extra_num_frames, avail_num_frames;
        unsigned int offset, offset_extra;
 
-       (void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
        /* Write new frame data into SP DMEM */
        fw = &sh_css_sp_fw;
        HIVE_ADDR_host_sp_com = fw->info.sp.host_sp_com;
index 98444a3cc3e47595097bfd6ea5b257a35b821fad..3c41e997de7918edd2338058d12377134a80d550 100644 (file)
@@ -64,7 +64,7 @@ sh_css_sp_init_pipeline(struct ia_css_pipeline *me,
                        const struct ia_css_metadata_config *md_config,
                        const struct ia_css_metadata_info *md_info,
 #if !defined(HAS_NO_INPUT_SYSTEM)
-                       const mipi_port_ID_t port_id
+                       const enum mipi_port_id port_id
 #endif
 #ifdef ISP2401
                        ,
index 52d0a647159784900da27fcc61bc5c779de6d3d4..77f135e7dc3c2786e3156c8ebbd679cc103bfca3 100644 (file)
 #include <ia_css_stream_format.h>
 
 unsigned int sh_css_stream_format_2_bits_per_subpixel(
-               enum ia_css_stream_format format)
+               enum atomisp_input_format format)
 {
        unsigned int rval;
 
        switch (format) {
-       case IA_CSS_STREAM_FORMAT_RGB_444:
+       case ATOMISP_INPUT_FORMAT_RGB_444:
                rval = 4;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_555:
+       case ATOMISP_INPUT_FORMAT_RGB_555:
                rval = 5;
                break;
-       case IA_CSS_STREAM_FORMAT_RGB_565:
-       case IA_CSS_STREAM_FORMAT_RGB_666:
-       case IA_CSS_STREAM_FORMAT_RAW_6:
+       case ATOMISP_INPUT_FORMAT_RGB_565:
+       case ATOMISP_INPUT_FORMAT_RGB_666:
+       case ATOMISP_INPUT_FORMAT_RAW_6:
                rval = 6;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_7:
+       case ATOMISP_INPUT_FORMAT_RAW_7:
                rval = 7;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
-       case IA_CSS_STREAM_FORMAT_YUV420_8:
-       case IA_CSS_STREAM_FORMAT_YUV422_8:
-       case IA_CSS_STREAM_FORMAT_RGB_888:
-       case IA_CSS_STREAM_FORMAT_RAW_8:
-       case IA_CSS_STREAM_FORMAT_BINARY_8:
-       case IA_CSS_STREAM_FORMAT_USER_DEF1:
-       case IA_CSS_STREAM_FORMAT_USER_DEF2:
-       case IA_CSS_STREAM_FORMAT_USER_DEF3:
-       case IA_CSS_STREAM_FORMAT_USER_DEF4:
-       case IA_CSS_STREAM_FORMAT_USER_DEF5:
-       case IA_CSS_STREAM_FORMAT_USER_DEF6:
-       case IA_CSS_STREAM_FORMAT_USER_DEF7:
-       case IA_CSS_STREAM_FORMAT_USER_DEF8:
+       case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
+       case ATOMISP_INPUT_FORMAT_YUV420_8:
+       case ATOMISP_INPUT_FORMAT_YUV422_8:
+       case ATOMISP_INPUT_FORMAT_RGB_888:
+       case ATOMISP_INPUT_FORMAT_RAW_8:
+       case ATOMISP_INPUT_FORMAT_BINARY_8:
+       case ATOMISP_INPUT_FORMAT_USER_DEF1:
+       case ATOMISP_INPUT_FORMAT_USER_DEF2:
+       case ATOMISP_INPUT_FORMAT_USER_DEF3:
+       case ATOMISP_INPUT_FORMAT_USER_DEF4:
+       case ATOMISP_INPUT_FORMAT_USER_DEF5:
+       case ATOMISP_INPUT_FORMAT_USER_DEF6:
+       case ATOMISP_INPUT_FORMAT_USER_DEF7:
+       case ATOMISP_INPUT_FORMAT_USER_DEF8:
                rval = 8;
                break;
-       case IA_CSS_STREAM_FORMAT_YUV420_10:
-       case IA_CSS_STREAM_FORMAT_YUV422_10:
-       case IA_CSS_STREAM_FORMAT_RAW_10:
+       case ATOMISP_INPUT_FORMAT_YUV420_10:
+       case ATOMISP_INPUT_FORMAT_YUV422_10:
+       case ATOMISP_INPUT_FORMAT_RAW_10:
                rval = 10;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_12:
+       case ATOMISP_INPUT_FORMAT_RAW_12:
                rval = 12;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_14:
+       case ATOMISP_INPUT_FORMAT_RAW_14:
                rval = 14;
                break;
-       case IA_CSS_STREAM_FORMAT_RAW_16:
-       case IA_CSS_STREAM_FORMAT_YUV420_16:
-       case IA_CSS_STREAM_FORMAT_YUV422_16:
+       case ATOMISP_INPUT_FORMAT_RAW_16:
+       case ATOMISP_INPUT_FORMAT_YUV420_16:
+       case ATOMISP_INPUT_FORMAT_YUV422_16:
                rval = 16;
                break;
        default:
index aab2b6207051ec04c1fbd257e3c43a001ea61703..b699f538e0ddb7e97c00a372970df527645ce54b 100644 (file)
@@ -18,6 +18,6 @@
 #include <ia_css_stream_format.h>
 
 unsigned int sh_css_stream_format_2_bits_per_subpixel(
-               enum ia_css_stream_format format);
+               enum atomisp_input_format format);
 
 #endif /* __SH_CSS_STREAM_FORMAT_H */
index 560014add0057ad8159765f14d94a9538a98710b..4b2d94a37ea1992c6958364c5f43fd53f49bcbba 100644 (file)
@@ -80,12 +80,10 @@ struct isp_mmu_client {
        unsigned int null_pte;
 
        /*
-        * set/get page directory base address (physical address).
+        * get page directory base address (physical address).
         *
         * must be provided.
         */
-       int (*set_pd_base) (struct isp_mmu *mmu,
-                       phys_addr_t pd_base);
        unsigned int (*get_pd_base) (struct isp_mmu *mmu, phys_addr_t pd_base);
        /*
         * callback to flush tlb.
index f21075c1e5032036f9d79d970672941dcbcad290..198f29f4a32465349a4c97a286bbac5258cb7265 100644 (file)
@@ -344,13 +344,6 @@ static int mmu_map(struct isp_mmu *mmu, unsigned int isp_virt,
                /*
                 * setup L1 page table physical addr to MMU
                 */
-               ret = mmu->driver->set_pd_base(mmu, l1_pt);
-               if (ret) {
-                       dev_err(atomisp_dev,
-                                "set page directory base address fail.\n");
-                       mutex_unlock(&mmu->pt_mutex);
-                       return ret;
-               }
                mmu->base_address = l1_pt;
                mmu->l1_pte = isp_pgaddr_to_pte_valid(mmu, l1_pt);
                memset(mmu->l2_pgt_refcount, 0, sizeof(int) * ISP_L1PT_PTES);
@@ -531,10 +524,8 @@ int isp_mmu_init(struct isp_mmu *mmu, struct isp_mmu_client *driver)
 
        mmu->driver = driver;
 
-       if (!driver->set_pd_base || !driver->tlb_flush_all) {
-               dev_err(atomisp_dev,
-                           "set_pd_base or tlb_flush_all operation "
-                            "not provided.\n");
+       if (!driver->tlb_flush_all) {
+               dev_err(atomisp_dev, "tlb_flush_all operation not provided.\n");
                return -EINVAL;
        }
 
index c59bcc9829667828734a2d599175180808d1c2f1..c0212564b7c8295db9e0a3066179b0095f31784b 100644 (file)
@@ -18,6 +18,7 @@
  */
 #include "type_support.h"
 #include "mmu/isp_mmu.h"
+#include "mmu/sh_mmu_mrfld.h"
 #include "memory_access/memory_access.h"
 #include "atomisp_compat.h"
 
@@ -40,20 +41,6 @@ static phys_addr_t sh_pte_to_phys(struct isp_mmu *mmu,
        return (phys_addr_t)((pte & ~mask) << ISP_PAGE_OFFSET);
 }
 
-/*
- * set page directory base address (physical address).
- *
- * must be provided.
- */
-static int sh_set_pd_base(struct isp_mmu *mmu,
-                         phys_addr_t phys)
-{
-       unsigned int pte = sh_phys_to_pte(mmu, phys);
-       /*mmgr_set_base_address(HOST_ADDRESS(pte));*/
-       atomisp_css_mmu_set_page_table_base_index(HOST_ADDRESS(pte));
-       return 0;
-}
-
 static unsigned int sh_get_pd_base(struct isp_mmu *mmu,
                                   phys_addr_t phys)
 {
@@ -81,7 +68,6 @@ struct isp_mmu_client sh_mmu_mrfld = {
        .name = "Silicon Hive ISP3000 MMU",
        .pte_valid_mask = MERR_VALID_PTE_MASK,
        .null_pte = ~MERR_VALID_PTE_MASK,
-       .set_pd_base = sh_set_pd_base,
        .get_pd_base = sh_get_pd_base,
        .tlb_flush_all = sh_tlb_flush,
        .phys_to_pte = sh_phys_to_pte,
index d8b7183db252ae0a25951effc534d14717c83074..3283c1b05d6ab0bd7f771ab12314117d89377fd0 100644 (file)
@@ -441,7 +441,7 @@ static int gmin_v1p2_ctrl(struct v4l2_subdev *subdev, int on)
 {
        struct gmin_subdev *gs = find_gmin_subdev(subdev);
 
-       if (gs && gs->v1p2_on == on)
+       if (!gs || gs->v1p2_on == on)
                return 0;
        gs->v1p2_on = on;
 
@@ -475,7 +475,7 @@ static int gmin_v1p8_ctrl(struct v4l2_subdev *subdev, int on)
                }
        }
 
-       if (gs && gs->v1p8_on == on)
+       if (!gs || gs->v1p8_on == on)
                return 0;
        gs->v1p8_on = on;
 
@@ -511,7 +511,7 @@ static int gmin_v2p8_ctrl(struct v4l2_subdev *subdev, int on)
                }
        }
 
-       if (gs && gs->v2p8_on == on)
+       if (!gs || gs->v2p8_on == on)
                return 0;
        gs->v2p8_on = on;
 
@@ -693,9 +693,11 @@ static int gmin_get_config_var(struct device *dev, const char *var,
        for (i = 0; i < sizeof(var8) && var8[i]; i++)
                var16[i] = var8[i];
 
+#ifdef CONFIG_64BIT
        /* To avoid owerflows when calling the efivar API */
        if (*out_len > ULONG_MAX)
                return -EINVAL;
+#endif
 
        /* Not sure this API usage is kosher; efivar_entry_get()'s
         * implementation simply uses VariableName and VendorGuid from
index 857b0e847c5e4bdd42b8d31482e5fef81ad3ef39..1ee216d71d423781f59d18b59c0a015d4a9954a2 100644 (file)
@@ -480,7 +480,7 @@ resizer_configure_common_in_params(struct vpfe_resizer_device *resizer)
        return 0;
 }
 static int
-resizer_configure_in_continious_mode(struct vpfe_resizer_device *resizer)
+resizer_configure_in_continuous_mode(struct vpfe_resizer_device *resizer)
 {
        struct device *dev = resizer->crop_resizer.subdev.v4l2_dev->dev;
        struct resizer_params *param = &resizer->config;
@@ -1242,7 +1242,7 @@ static int resizer_do_hw_setup(struct vpfe_resizer_device *resizer)
                    ipipeif_source == IPIPEIF_OUTPUT_RESIZER)
                        ret = resizer_configure_in_single_shot_mode(resizer);
                else
-                       ret =  resizer_configure_in_continious_mode(resizer);
+                       ret =  resizer_configure_in_continuous_mode(resizer);
                if (ret)
                        return ret;
                ret = config_rsz_hw(resizer, param);
index 1aa2be891704e99f40f65056bbf33246f869015d..16cab40156ca6c3f5c779ece59ac9e7a4f1906e0 100644 (file)
@@ -1005,7 +1005,7 @@ static int csi_link_validate(struct v4l2_subdev *sd,
                             struct v4l2_subdev_format *sink_fmt)
 {
        struct csi_priv *priv = v4l2_get_subdevdata(sd);
-       struct v4l2_fwnode_endpoint upstream_ep;
+       struct v4l2_fwnode_endpoint upstream_ep = {};
        const struct imx_media_pixfmt *incc;
        bool is_csi2;
        int ret;
@@ -1800,7 +1800,10 @@ static int imx_csi_probe(struct platform_device *pdev)
        pinctrl = devm_pinctrl_get_select_default(priv->dev);
        if (IS_ERR(pinctrl)) {
                ret = PTR_ERR(priv->vdev);
-               goto free;
+               dev_dbg(priv->dev,
+                       "devm_pinctrl_get_select_default() failed: %d\n", ret);
+               if (ret != -ENODEV)
+                       goto free;
        }
 
        ret = v4l2_async_register_subdev(&priv->sd);
index edc6fec9ad8400f03d5984f3344359aa15935f09..986058a57917f5b6ce0394d83206c08e161f4bd9 100644 (file)
@@ -44,6 +44,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
  * Using this limit prevents one virtqueue from starving others. */
 #define VHOST_NET_WEIGHT 0x80000
 
+/* Max number of packets transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving rx. */
+#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2)
+
 /* MAX number of TX used buffers for outstanding zerocopy */
 #define VHOST_MAX_PEND 128
 #define VHOST_GOODCOPY_LEN 256
@@ -473,6 +477,7 @@ static void handle_tx(struct vhost_net *net)
        struct socket *sock;
        struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
        bool zcopy, zcopy_used;
+       int sent_pkts = 0;
 
        mutex_lock(&vq->mutex);
        sock = vq->private_data;
@@ -580,7 +585,8 @@ static void handle_tx(struct vhost_net *net)
                else
                        vhost_zerocopy_signal_used(net, vq);
                vhost_net_tx_packet(net);
-               if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+               if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
+                   unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) {
                        vhost_poll_queue(&vq->poll);
                        break;
                }
index bec722e41f5831be0890a1f4c02f1e486782efef..f3bd8e9412245919ba3a73fb9d94677c280d1427 100644 (file)
@@ -641,14 +641,14 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
 
-static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
+static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
 {
        u64 a = addr / VHOST_PAGE_SIZE / 8;
 
        /* Make sure 64 bit math will not overflow. */
        if (a > ULONG_MAX - (unsigned long)log_base ||
            a + (unsigned long)log_base > ULONG_MAX)
-               return 0;
+               return false;
 
        return access_ok(VERIFY_WRITE, log_base + a,
                         (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
@@ -661,30 +661,30 @@ static bool vhost_overflow(u64 uaddr, u64 size)
 }
 
 /* Caller should have vq mutex and device mutex. */
-static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
-                              int log_all)
+static bool vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
+                               int log_all)
 {
        struct vhost_umem_node *node;
 
        if (!umem)
-               return 0;
+               return false;
 
        list_for_each_entry(node, &umem->umem_list, link) {
                unsigned long a = node->userspace_addr;
 
                if (vhost_overflow(node->userspace_addr, node->size))
-                       return 0;
+                       return false;
 
 
                if (!access_ok(VERIFY_WRITE, (void __user *)a,
                                    node->size))
-                       return 0;
+                       return false;
                else if (log_all && !log_access_ok(log_base,
                                                   node->start,
                                                   node->size))
-                       return 0;
+                       return false;
        }
-       return 1;
+       return true;
 }
 
 static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
@@ -701,13 +701,13 @@ static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
 
 /* Can we switch to this memory table? */
 /* Caller should have device mutex but not vq mutex */
-static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
-                           int log_all)
+static bool memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
+                            int log_all)
 {
        int i;
 
        for (i = 0; i < d->nvqs; ++i) {
-               int ok;
+               bool ok;
                bool log;
 
                mutex_lock(&d->vqs[i]->mutex);
@@ -717,12 +717,12 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
                        ok = vq_memory_access_ok(d->vqs[i]->log_base,
                                                 umem, log);
                else
-                       ok = 1;
+                       ok = true;
                mutex_unlock(&d->vqs[i]->mutex);
                if (!ok)
-                       return 0;
+                       return false;
        }
-       return 1;
+       return true;
 }
 
 static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
@@ -744,7 +744,7 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
                struct iov_iter t;
                void __user *uaddr = vhost_vq_meta_fetch(vq,
                                     (u64)(uintptr_t)to, size,
-                                    VHOST_ADDR_DESC);
+                                    VHOST_ADDR_USED);
 
                if (uaddr)
                        return __copy_to_user(uaddr, from, size);
@@ -959,21 +959,21 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
        spin_unlock(&d->iotlb_lock);
 }
 
-static int umem_access_ok(u64 uaddr, u64 size, int access)
+static bool umem_access_ok(u64 uaddr, u64 size, int access)
 {
        unsigned long a = uaddr;
 
        /* Make sure 64 bit math will not overflow. */
        if (vhost_overflow(uaddr, size))
-               return -EFAULT;
+               return false;
 
        if ((access & VHOST_ACCESS_RO) &&
            !access_ok(VERIFY_READ, (void __user *)a, size))
-               return -EFAULT;
+               return false;
        if ((access & VHOST_ACCESS_WO) &&
            !access_ok(VERIFY_WRITE, (void __user *)a, size))
-               return -EFAULT;
-       return 0;
+               return false;
+       return true;
 }
 
 static int vhost_process_iotlb_msg(struct vhost_dev *dev,
@@ -988,7 +988,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
                        ret = -EFAULT;
                        break;
                }
-               if (umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
+               if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
                        ret = -EFAULT;
                        break;
                }
@@ -1135,10 +1135,10 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
        return 0;
 }
 
-static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
-                       struct vring_desc __user *desc,
-                       struct vring_avail __user *avail,
-                       struct vring_used __user *used)
+static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
+                        struct vring_desc __user *desc,
+                        struct vring_avail __user *avail,
+                        struct vring_used __user *used)
 
 {
        size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
@@ -1161,8 +1161,8 @@ static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
                vq->meta_iotlb[type] = node;
 }
 
-static int iotlb_access_ok(struct vhost_virtqueue *vq,
-                          int access, u64 addr, u64 len, int type)
+static bool iotlb_access_ok(struct vhost_virtqueue *vq,
+                           int access, u64 addr, u64 len, int type)
 {
        const struct vhost_umem_node *node;
        struct vhost_umem *umem = vq->iotlb;
@@ -1220,7 +1220,7 @@ EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
 
 /* Can we log writes? */
 /* Caller should have device mutex but not vq mutex */
-int vhost_log_access_ok(struct vhost_dev *dev)
+bool vhost_log_access_ok(struct vhost_dev *dev)
 {
        return memory_access_ok(dev, dev->umem, 1);
 }
@@ -1228,8 +1228,8 @@ EXPORT_SYMBOL_GPL(vhost_log_access_ok);
 
 /* Verify access for write logging. */
 /* Caller should have vq mutex and device mutex */
-static int vq_log_access_ok(struct vhost_virtqueue *vq,
-                           void __user *log_base)
+static bool vq_log_access_ok(struct vhost_virtqueue *vq,
+                            void __user *log_base)
 {
        size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 
@@ -1242,12 +1242,14 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq,
 
 /* Can we start vq? */
 /* Caller should have vq mutex and device mutex */
-int vhost_vq_access_ok(struct vhost_virtqueue *vq)
+bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
 {
-       int ret = vq_log_access_ok(vq, vq->log_base);
+       if (!vq_log_access_ok(vq, vq->log_base))
+               return false;
 
-       if (ret || vq->iotlb)
-               return ret;
+       /* Access validation occurs at prefetch time with IOTLB */
+       if (vq->iotlb)
+               return true;
 
        return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
 }
index d8ee85ae8fdccec270bc69733a1dfacd766cce34..6c844b90a1687317792bb7687a9955d7436bf572 100644 (file)
@@ -178,8 +178,8 @@ void vhost_dev_cleanup(struct vhost_dev *);
 void vhost_dev_stop(struct vhost_dev *);
 long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
 long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp);
-int vhost_vq_access_ok(struct vhost_virtqueue *vq);
-int vhost_log_access_ok(struct vhost_dev *);
+bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
+bool vhost_log_access_ok(struct vhost_dev *);
 
 int vhost_get_vq_desc(struct vhost_virtqueue *,
                      struct iovec iov[], unsigned int iov_count,
index 4f950c68605533028b3ae110b9dc0d7ba4ace683..83d3d271ca1562316884b772a901dea242577de1 100644 (file)
@@ -9,9 +9,6 @@ if HAS_IOMEM
 config HAVE_FB_ATMEL
        bool
 
-config SH_LCD_MIPI_DSI
-       bool
-
 source "drivers/char/agp/Kconfig"
 
 source "drivers/gpu/vga/Kconfig"
index d1d3796773aa2d4c77716f3ba5e2fcebf0ae5653..08b822656846cca9072d40f40ea090635c75f1ab 100644 (file)
@@ -827,10 +827,8 @@ static struct sti_struct *sti_try_rom_generic(unsigned long address,
        }
        
        sti = kzalloc(sizeof(*sti), GFP_KERNEL);
-       if (!sti) {
-               printk(KERN_ERR "Not enough memory !\n");
+       if (!sti)
                return NULL;
-       }
 
        spin_lock_init(&sti->lock);
 
index 399573742487ce95408869102c61e4ff94a023fd..d94254263ea5caa77887ceef92b9671fd38a4761 100644 (file)
@@ -1053,6 +1053,11 @@ config FB_I810_I2C
        bool "Enable DDC Support"
        depends on FB_I810 && FB_I810_GTF
        select FB_DDC
+       help
+         Add DDC/I2C support for i810fb.  This will allow the driver to get
+         display information, especially for monitors with fickle timings.
+
+         If unsure, say Y.
 
 config FB_LE80578
        tristate "Intel LE80578 (Vermilion) support"
@@ -1917,8 +1922,7 @@ config FB_TMIO_ACCELL
 
 config FB_S3C
        tristate "Samsung S3C framebuffer support"
-       depends on FB && (CPU_S3C2416 || ARCH_S3C64XX || \
-               ARCH_S5PV210 || ARCH_EXYNOS)
+       depends on FB && (CPU_S3C2416 || ARCH_S3C64XX)
        select FB_CFB_FILLRECT
        select FB_CFB_COPYAREA
        select FB_CFB_IMAGEBLIT
index 36d25190b48c80991dcabc0c5480c897f60e34eb..38c1f324ce15d98d513e7dfd613c9a7e835f5979 100644 (file)
@@ -967,9 +967,8 @@ static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id)
                goto out;
        }
 
-       fb = kzalloc(sizeof(struct clcd_fb), GFP_KERNEL);
+       fb = kzalloc(sizeof(*fb), GFP_KERNEL);
        if (!fb) {
-               printk(KERN_INFO "CLCD: could not allocate new clcd_fb struct\n");
                ret = -ENOMEM;
                goto free_region;
        }
index 3dee267d7c75024d8cce9086c5a7df86b35bd589..076d24afbd728bb3e8b4ffbc41f8a3be9642b51d 100644 (file)
 #include <linux/delay.h>
 #include <linux/backlight.h>
 #include <linux/gfp.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <video/of_display_timing.h>
 #include <linux/regulator/consumer.h>
 #include <video/videomode.h>
@@ -61,8 +61,7 @@ struct atmel_lcdfb_info {
 };
 
 struct atmel_lcdfb_power_ctrl_gpio {
-       int gpio;
-       int active_low;
+       struct gpio_desc *gpiod;
 
        struct list_head list;
 };
@@ -1018,7 +1017,7 @@ static void atmel_lcdfb_power_control_gpio(struct atmel_lcdfb_pdata *pdata, int
        struct atmel_lcdfb_power_ctrl_gpio *og;
 
        list_for_each_entry(og, &pdata->pwr_gpios, list)
-               gpio_set_value(og->gpio, on);
+               gpiod_set_value(og->gpiod, on);
 }
 
 static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo)
@@ -1031,11 +1030,11 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo)
        struct device_node *display_np;
        struct device_node *timings_np;
        struct display_timings *timings;
-       enum of_gpio_flags flags;
        struct atmel_lcdfb_power_ctrl_gpio *og;
        bool is_gpio_power = false;
+       struct gpio_desc *gpiod;
        int ret = -ENOENT;
-       int i, gpio;
+       int i;
 
        sinfo->config = (struct atmel_lcdfb_config*)
                of_match_device(atmel_lcdfb_dt_ids, dev)->data;
@@ -1072,28 +1071,22 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo)
 
        INIT_LIST_HEAD(&pdata->pwr_gpios);
        ret = -ENOMEM;
-       for (i = 0; i < of_gpio_named_count(display_np, "atmel,power-control-gpio"); i++) {
-               gpio = of_get_named_gpio_flags(display_np, "atmel,power-control-gpio",
-                                              i, &flags);
-               if (gpio < 0)
+       for (i = 0; i < gpiod_count(dev, "atmel,power-control"); i++) {
+               gpiod = devm_gpiod_get_index(dev, "atmel,power-control",
+                                            i, GPIOD_ASIS);
+               if (IS_ERR(gpiod))
                        continue;
 
                og = devm_kzalloc(dev, sizeof(*og), GFP_KERNEL);
                if (!og)
                        goto put_display_node;
 
-               og->gpio = gpio;
-               og->active_low = flags & OF_GPIO_ACTIVE_LOW;
+               og->gpiod = gpiod;
                is_gpio_power = true;
-               ret = devm_gpio_request(dev, gpio, "lcd-power-control-gpio");
-               if (ret) {
-                       dev_err(dev, "request gpio %d failed\n", gpio);
-                       goto put_display_node;
-               }
 
-               ret = gpio_direction_output(gpio, og->active_low);
+               ret = gpiod_direction_output(gpiod, gpiod_is_active_low(gpiod));
                if (ret) {
-                       dev_err(dev, "set direction output gpio %d failed\n", gpio);
+                       dev_err(dev, "set direction output gpio atmel,power-control[%d] failed\n", i);
                        goto put_display_node;
                }
                list_add(&og->list, &pdata->pwr_gpios);
index db18474607c9c9c701353b8c347baf3e39c5ba63..09b0e558dce81d7eea45f4f38fd148e70341837f 100644 (file)
@@ -1716,7 +1716,7 @@ static int aty128fb_setup(char *options)
                        continue;
                }
                if(!strncmp(this_opt, "nomtrr", 6)) {
-                       mtrr = 0;
+                       mtrr = false;
                        continue;
                }
 #ifdef CONFIG_PPC_PMAC
index 7d3bd723d3d585f0d48db8cd6d980af3b6240471..74a62aa193c02b9000e587fc47f707f3340c2cda 100644 (file)
@@ -180,7 +180,7 @@ static int aty_dsp_gt(const struct fb_info *info, u32 bpp, struct pll_ct *pll)
                dsp_on = ((multiplier << vshift) + divider) / divider;
                tmp = ((ras_multiplier << xshift) + ras_divider) / ras_divider;
                if (dsp_on < tmp)
-               dsp_on = tmp;
+                       dsp_on = tmp;
                dsp_on = dsp_on + (tmp * 2) + (pll->xclkpagefaultdelay << xshift);
        }
 
index 87608c0b23510154815b36e522a95911f9141f0c..e8594bbaea609ff41c296132297e250df5a07ae4 100644 (file)
@@ -2255,6 +2255,23 @@ static const struct bin_attribute edid2_attr = {
        .read   = radeon_show_edid2,
 };
 
+static int radeon_kick_out_firmware_fb(struct pci_dev *pdev)
+{
+       struct apertures_struct *ap;
+
+       ap = alloc_apertures(1);
+       if (!ap)
+               return -ENOMEM;
+
+       ap->ranges[0].base = pci_resource_start(pdev, 0);
+       ap->ranges[0].size = pci_resource_len(pdev, 0);
+
+       remove_conflicting_framebuffers(ap, KBUILD_MODNAME, false);
+
+       kfree(ap);
+
+       return 0;
+}
 
 static int radeonfb_pci_register(struct pci_dev *pdev,
                                 const struct pci_device_id *ent)
@@ -2308,6 +2325,10 @@ static int radeonfb_pci_register(struct pci_dev *pdev,
        rinfo->fb_base_phys = pci_resource_start (pdev, 0);
        rinfo->mmio_base_phys = pci_resource_start (pdev, 2);
 
+       ret = radeon_kick_out_firmware_fb(pdev);
+       if (ret)
+               return ret;
+
        /* request the mem regions */
        ret = pci_request_region(pdev, 0, "radeonfb framebuffer");
        if (ret < 0) {
index 8de42f617d16792840515655b9457500ff5d4286..7c9a672e9811758f378a328790d5aeae3faf383b 100644 (file)
@@ -410,18 +410,15 @@ static int au1100fb_setup(struct au1100fb_device *fbdev)
 
 static int au1100fb_drv_probe(struct platform_device *dev)
 {
-       struct au1100fb_device *fbdev = NULL;
+       struct au1100fb_device *fbdev;
        struct resource *regs_res;
        unsigned long page;
        struct clk *c;
 
        /* Allocate new device private */
-       fbdev = devm_kzalloc(&dev->dev, sizeof(struct au1100fb_device),
-                            GFP_KERNEL);
-       if (!fbdev) {
-               print_err("fail to allocate device private record");
+       fbdev = devm_kzalloc(&dev->dev, sizeof(*fbdev), GFP_KERNEL);
+       if (!fbdev)
                return -ENOMEM;
-       }
 
        if (au1100fb_setup(fbdev))
                goto failed;
index 25abbcf389134201c47e2a729523d97dc344df9b..1bfd13cbd4e3e0ef40a23a5f03404c3e54e7f08a 100644 (file)
@@ -1960,12 +1960,8 @@ static int __init fsl_diu_init(void)
 
        of_node_put(np);
        coherence_data = vmalloc(coherence_data_size);
-       if (!coherence_data) {
-               pr_err("fsl-diu-fb: could not allocate coherence data "
-                      "(size=%zu)\n", coherence_data_size);
+       if (!coherence_data)
                return -ENOMEM;
-       }
-
 #endif
 
        ret = platform_driver_register(&fsl_diu_driver);
index 02796a4317a94c95e3d2ae10649f69e36490c7fd..f64e1d55d7a1194b728d65dfbee402334aede8ac 100644 (file)
@@ -696,10 +696,9 @@ static void* matroxfb_crtc2_probe(struct matrox_fb_info* minfo) {
        if (!minfo->devflags.crtc2)
                return NULL;
        m2info = kzalloc(sizeof(*m2info), GFP_KERNEL);
-       if (!m2info) {
-               printk(KERN_ERR "matroxfb_crtc2: Not enough memory for CRTC2 control structs\n");
+       if (!m2info)
                return NULL;
-       }
+
        m2info->primary_dev = minfo;
        if (matroxfb_dh_registerfb(m2info)) {
                kfree(m2info);
index 90d38de344797884fbf9d092e8095851dad21412..77c0a2f45b3b949819030e90f252f1905e554843 100644 (file)
@@ -280,6 +280,7 @@ static void offb_destroy(struct fb_info *info)
        if (info->screen_base)
                iounmap(info->screen_base);
        release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
+       fb_dealloc_cmap(&info->cmap);
        framebuffer_release(info);
 }
 
@@ -518,6 +519,7 @@ static void __init offb_init_fb(const char *name,
        return;
 
 out_err:
+       fb_dealloc_cmap(&info->cmap);
        iounmap(info->screen_base);
 out_aper:
        iounmap(par->cmap_adr);
index 5f4f696c2ecfd5e677575675be2969b1ba09947d..9ec85ccd0ce96f1c0ae36552ebdaa6fd1764358d 100644 (file)
@@ -1383,11 +1383,9 @@ static int s3c_fb_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       sfb = devm_kzalloc(dev, sizeof(struct s3c_fb), GFP_KERNEL);
-       if (!sfb) {
-               dev_err(dev, "no memory for framebuffers\n");
+       sfb = devm_kzalloc(dev, sizeof(*sfb), GFP_KERNEL);
+       if (!sfb)
                return -ENOMEM;
-       }
 
        dev_dbg(dev, "allocate new framebuffer %p\n", sfb);
 
@@ -1716,63 +1714,6 @@ static struct s3c_fb_win_variant s3c_fb_data_64xx_wins[] = {
        },
 };
 
-static struct s3c_fb_win_variant s3c_fb_data_s5p_wins[] = {
-       [0] = {
-               .has_osd_c      = 1,
-               .osd_size_off   = 0x8,
-               .palette_sz     = 256,
-               .valid_bpp      = (VALID_BPP1248 | VALID_BPP(13) |
-                                  VALID_BPP(15) | VALID_BPP(16) |
-                                  VALID_BPP(18) | VALID_BPP(19) |
-                                  VALID_BPP(24) | VALID_BPP(25) |
-                                  VALID_BPP(32)),
-       },
-       [1] = {
-               .has_osd_c      = 1,
-               .has_osd_d      = 1,
-               .osd_size_off   = 0xc,
-               .has_osd_alpha  = 1,
-               .palette_sz     = 256,
-               .valid_bpp      = (VALID_BPP1248 | VALID_BPP(13) |
-                                  VALID_BPP(15) | VALID_BPP(16) |
-                                  VALID_BPP(18) | VALID_BPP(19) |
-                                  VALID_BPP(24) | VALID_BPP(25) |
-                                  VALID_BPP(32)),
-       },
-       [2] = {
-               .has_osd_c      = 1,
-               .has_osd_d      = 1,
-               .osd_size_off   = 0xc,
-               .has_osd_alpha  = 1,
-               .palette_sz     = 256,
-               .valid_bpp      = (VALID_BPP1248 | VALID_BPP(13) |
-                                  VALID_BPP(15) | VALID_BPP(16) |
-                                  VALID_BPP(18) | VALID_BPP(19) |
-                                  VALID_BPP(24) | VALID_BPP(25) |
-                                  VALID_BPP(32)),
-       },
-       [3] = {
-               .has_osd_c      = 1,
-               .has_osd_alpha  = 1,
-               .palette_sz     = 256,
-               .valid_bpp      = (VALID_BPP1248 | VALID_BPP(13) |
-                                  VALID_BPP(15) | VALID_BPP(16) |
-                                  VALID_BPP(18) | VALID_BPP(19) |
-                                  VALID_BPP(24) | VALID_BPP(25) |
-                                  VALID_BPP(32)),
-       },
-       [4] = {
-               .has_osd_c      = 1,
-               .has_osd_alpha  = 1,
-               .palette_sz     = 256,
-               .valid_bpp      = (VALID_BPP1248 | VALID_BPP(13) |
-                                  VALID_BPP(15) | VALID_BPP(16) |
-                                  VALID_BPP(18) | VALID_BPP(19) |
-                                  VALID_BPP(24) | VALID_BPP(25) |
-                                  VALID_BPP(32)),
-       },
-};
-
 static struct s3c_fb_driverdata s3c_fb_data_64xx = {
        .variant = {
                .nr_windows     = 5,
@@ -1804,102 +1745,6 @@ static struct s3c_fb_driverdata s3c_fb_data_64xx = {
        .win[4] = &s3c_fb_data_64xx_wins[4],
 };
 
-static struct s3c_fb_driverdata s3c_fb_data_s5pv210 = {
-       .variant = {
-               .nr_windows     = 5,
-               .vidtcon        = VIDTCON0,
-               .wincon         = WINCON(0),
-               .winmap         = WINxMAP(0),
-               .keycon         = WKEYCON,
-               .osd            = VIDOSD_BASE,
-               .osd_stride     = 16,
-               .buf_start      = VIDW_BUF_START(0),
-               .buf_size       = VIDW_BUF_SIZE(0),
-               .buf_end        = VIDW_BUF_END(0),
-
-               .palette = {
-                       [0] = 0x2400,
-                       [1] = 0x2800,
-                       [2] = 0x2c00,
-                       [3] = 0x3000,
-                       [4] = 0x3400,
-               },
-
-               .has_shadowcon  = 1,
-               .has_blendcon   = 1,
-               .has_clksel     = 1,
-               .has_fixvclk    = 1,
-       },
-       .win[0] = &s3c_fb_data_s5p_wins[0],
-       .win[1] = &s3c_fb_data_s5p_wins[1],
-       .win[2] = &s3c_fb_data_s5p_wins[2],
-       .win[3] = &s3c_fb_data_s5p_wins[3],
-       .win[4] = &s3c_fb_data_s5p_wins[4],
-};
-
-static struct s3c_fb_driverdata s3c_fb_data_exynos4 = {
-       .variant = {
-               .nr_windows     = 5,
-               .vidtcon        = VIDTCON0,
-               .wincon         = WINCON(0),
-               .winmap         = WINxMAP(0),
-               .keycon         = WKEYCON,
-               .osd            = VIDOSD_BASE,
-               .osd_stride     = 16,
-               .buf_start      = VIDW_BUF_START(0),
-               .buf_size       = VIDW_BUF_SIZE(0),
-               .buf_end        = VIDW_BUF_END(0),
-
-               .palette = {
-                       [0] = 0x2400,
-                       [1] = 0x2800,
-                       [2] = 0x2c00,
-                       [3] = 0x3000,
-                       [4] = 0x3400,
-               },
-
-               .has_shadowcon  = 1,
-               .has_blendcon   = 1,
-               .has_fixvclk    = 1,
-       },
-       .win[0] = &s3c_fb_data_s5p_wins[0],
-       .win[1] = &s3c_fb_data_s5p_wins[1],
-       .win[2] = &s3c_fb_data_s5p_wins[2],
-       .win[3] = &s3c_fb_data_s5p_wins[3],
-       .win[4] = &s3c_fb_data_s5p_wins[4],
-};
-
-static struct s3c_fb_driverdata s3c_fb_data_exynos5 = {
-       .variant = {
-               .nr_windows     = 5,
-               .vidtcon        = FIMD_V8_VIDTCON0,
-               .wincon         = WINCON(0),
-               .winmap         = WINxMAP(0),
-               .keycon         = WKEYCON,
-               .osd            = VIDOSD_BASE,
-               .osd_stride     = 16,
-               .buf_start      = VIDW_BUF_START(0),
-               .buf_size       = VIDW_BUF_SIZE(0),
-               .buf_end        = VIDW_BUF_END(0),
-
-               .palette = {
-                       [0] = 0x2400,
-                       [1] = 0x2800,
-                       [2] = 0x2c00,
-                       [3] = 0x3000,
-                       [4] = 0x3400,
-               },
-               .has_shadowcon  = 1,
-               .has_blendcon   = 1,
-               .has_fixvclk    = 1,
-       },
-       .win[0] = &s3c_fb_data_s5p_wins[0],
-       .win[1] = &s3c_fb_data_s5p_wins[1],
-       .win[2] = &s3c_fb_data_s5p_wins[2],
-       .win[3] = &s3c_fb_data_s5p_wins[3],
-       .win[4] = &s3c_fb_data_s5p_wins[4],
-};
-
 /* S3C2443/S3C2416 style hardware */
 static struct s3c_fb_driverdata s3c_fb_data_s3c2443 = {
        .variant = {
@@ -1941,15 +1786,6 @@ static const struct platform_device_id s3c_fb_driver_ids[] = {
        {
                .name           = "s3c-fb",
                .driver_data    = (unsigned long)&s3c_fb_data_64xx,
-       }, {
-               .name           = "s5pv210-fb",
-               .driver_data    = (unsigned long)&s3c_fb_data_s5pv210,
-       }, {
-               .name           = "exynos4-fb",
-               .driver_data    = (unsigned long)&s3c_fb_data_exynos4,
-       }, {
-               .name           = "exynos5-fb",
-               .driver_data    = (unsigned long)&s3c_fb_data_exynos5,
        }, {
                .name           = "s3c2443-fb",
                .driver_data    = (unsigned long)&s3c_fb_data_s3c2443,
index 85d6738b6c64ad2755629f6d71596da9a0f0bfd8..400b0e5681b24aeb3b0380c362aa7742e226a5c1 100644 (file)
@@ -1461,81 +1461,5 @@ static const struct SiS_LVDSCRT1Data SiS_LVDSCRT1640x480_1_H[] =
    0x00}}
 };
 
-bool           SiSInitPtr(struct SiS_Private *SiS_Pr);
-unsigned short SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
-                               int VDisplay, int Depth, bool FSTN,
-                               unsigned short CustomT, int LCDwith, int LCDheight,
-                               unsigned int VBFlags2);
-unsigned short SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
-                               int VDisplay, int Depth, unsigned int VBFlags2);
-unsigned short SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
-                               int VDisplay, int Depth, unsigned int VBFlags2);
-
-void           SiS_DisplayOn(struct SiS_Private *SiS_Pr);
-void           SiS_DisplayOff(struct SiS_Private *SiS_Pr);
-void           SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
-void           SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
-void           SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
-unsigned short SiS_GetModeFlag(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex);
-bool           SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
-
-bool           SiS_SearchModeID(struct SiS_Private *SiS_Pr, unsigned short *ModeNo,
-                               unsigned short *ModeIdIndex);
-unsigned short SiS_GetModePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex);
-unsigned short  SiS_GetRefCRTVCLK(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
-unsigned short  SiS_GetRefCRT1CRTC(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
-unsigned short SiS_GetColorDepth(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex);
-unsigned short SiS_GetOffset(struct SiS_Private *SiS_Pr,unsigned short ModeNo,
-                               unsigned short ModeIdIndex, unsigned short RRTI);
-#ifdef CONFIG_FB_SIS_300
-void           SiS_GetFIFOThresholdIndex300(struct SiS_Private *SiS_Pr, unsigned short *idx1,
-                               unsigned short *idx2);
-unsigned short SiS_GetFIFOThresholdB300(unsigned short idx1, unsigned short idx2);
-unsigned short SiS_GetLatencyFactor630(struct SiS_Private *SiS_Pr, unsigned short index);
-#endif
-void           SiS_LoadDAC(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
-bool           SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
-void           SiS_CalcCRRegisters(struct SiS_Private *SiS_Pr, int depth);
-void           SiS_CalcLCDACRT1Timing(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex);
-void           SiS_Generic_ConvertCRData(struct SiS_Private *SiS_Pr, unsigned char *crdata, int xres,
-                               int yres, struct fb_var_screeninfo *var, bool writeres);
-
-/* From init301.c: */
-extern void            SiS_GetVBInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex, int chkcrt2mode);
-extern void            SiS_GetLCDResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex);
-extern void            SiS_SetYPbPr(struct SiS_Private *SiS_Pr);
-extern void            SiS_SetTVMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex);
-extern void            SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
-extern void            SiS_DisableBridge(struct SiS_Private *);
-extern bool            SiS_SetCRT2Group(struct SiS_Private *, unsigned short);
-extern unsigned short  SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex);
-extern void            SiS_WaitRetrace1(struct SiS_Private *SiS_Pr);
-extern unsigned short  SiS_GetResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex);
-extern unsigned short  SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short tempax);
-extern unsigned short  SiS_GetVCLK2Ptr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-                               unsigned short ModeIdIndex, unsigned short RRTI);
-extern bool            SiS_IsVAMode(struct SiS_Private *);
-extern bool            SiS_IsDualEdge(struct SiS_Private *);
-
-#ifdef CONFIG_FB_SIS_300
-extern unsigned int    sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
-extern void            sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg,
-                               unsigned int val);
-#endif
-#ifdef CONFIG_FB_SIS_315
-extern void            sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg,
-                               unsigned char val);
-extern unsigned int    sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
-#endif
-
 #endif
 
index 02ee752d5000567148cf24a174471b559cfc80b1..27a2b72e50e84b45e5ba1b292daf4bd386442143 100644 (file)
 #define SiS_I2CDELAY      1000
 #define SiS_I2CDELAYSHORT  150
 
+static const unsigned char SiS_YPbPrTable[3][64] = {
+  {
+    0x17,0x1d,0x03,0x09,0x05,0x06,0x0c,0x0c,
+    0x94,0x49,0x01,0x0a,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x1b,
+    0x0c,0x50,0x00,0x97,0x00,0xda,0x4a,0x17,
+    0x7d,0x05,0x4b,0x00,0x00,0xe2,0x00,0x02,
+    0x03,0x0a,0x65,0x9d /*0x8d*/,0x08,0x92,0x8f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x53 /*0x50*/,
+    0x00,0x40,0x44,0x00,0xdb,0x02,0x3b,0x00
+  },
+  {
+    0x33,0x06,0x06,0x09,0x0b,0x0c,0x0c,0x0c,
+    0x98,0x0a,0x01,0x0d,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
+    0x0c,0x50,0xb2,0x9f,0x16,0x59,0x4f,0x13,
+    0xad,0x11,0xad,0x1d,0x40,0x8a,0x3d,0xb8,
+    0x51,0x5e,0x60,0x49,0x7d,0x92,0x0f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x4e,
+    0x43,0x41,0x11,0x00,0xfc,0xff,0x32,0x00
+  },
+  {
+#if 0 /* OK, but sticks to left edge */
+    0x13,0x1d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
+    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
+    0xed,0x50,0x70,0x9f,0x16,0x59,0x21 /*0x2b*/,0x13,
+    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
+    0x4b,0x4b,0x65 /*0x6f*/,0x2f,0x63,0x92,0x0f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x27,
+    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
+#endif
+#if 1 /* Perfect */
+    0x23,0x2d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
+    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
+    0xed,0x50,0x70,0x9f,0x16,0x59,0x60,0x13,
+    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
+    0x4b,0x4b,0x6f,0x2f,0x63,0x92,0x0f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x73,
+    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
+#endif
+  }
+};
+
+static const unsigned char SiS_TVPhase[] =
+{
+       0x21,0xED,0xBA,0x08,    /* 0x00 SiS_NTSCPhase */
+       0x2A,0x05,0xE3,0x00,    /* 0x01 SiS_PALPhase */
+       0x21,0xE4,0x2E,0x9B,    /* 0x02 SiS_PALMPhase */
+       0x21,0xF4,0x3E,0xBA,    /* 0x03 SiS_PALNPhase */
+       0x1E,0x8B,0xA2,0xA7,
+       0x1E,0x83,0x0A,0xE0,    /* 0x05 SiS_SpecialPhaseM */
+       0x00,0x00,0x00,0x00,
+       0x00,0x00,0x00,0x00,
+       0x21,0xF0,0x7B,0xD6,    /* 0x08 SiS_NTSCPhase2 */
+       0x2A,0x09,0x86,0xE9,    /* 0x09 SiS_PALPhase2 */
+       0x21,0xE6,0xEF,0xA4,    /* 0x0a SiS_PALMPhase2 */
+       0x21,0xF6,0x94,0x46,    /* 0x0b SiS_PALNPhase2 */
+       0x1E,0x8B,0xA2,0xA7,
+       0x1E,0x83,0x0A,0xE0,    /* 0x0d SiS_SpecialPhaseM */
+       0x00,0x00,0x00,0x00,
+       0x00,0x00,0x00,0x00,
+       0x1e,0x8c,0x5c,0x7a,    /* 0x10 SiS_SpecialPhase */
+       0x25,0xd4,0xfd,0x5e     /* 0x11 SiS_SpecialPhaseJ */
+};
+
+static const unsigned char SiS_HiTVGroup3_1[] = {
+    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x13,
+    0xb1, 0x41, 0x62, 0x62, 0xff, 0xf4, 0x45, 0xa6,
+    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
+    0xac, 0xda, 0x60, 0xfe, 0x6a, 0x9a, 0x06, 0x10,
+    0xd1, 0x04, 0x18, 0x0a, 0xff, 0x80, 0x00, 0x80,
+    0x3b, 0x77, 0x00, 0xef, 0xe0, 0x10, 0xb0, 0xe0,
+    0x10, 0x4f, 0x0f, 0x0f, 0x05, 0x0f, 0x08, 0x6e,
+    0x1a, 0x1f, 0x25, 0x2a, 0x4c, 0xaa, 0x01
+};
+
+static const unsigned char SiS_HiTVGroup3_2[] = {
+    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x7a,
+    0x54, 0x41, 0xe7, 0xe7, 0xff, 0xf4, 0x45, 0xa6,
+    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
+    0xac, 0x6a, 0x60, 0x2b, 0x52, 0xcd, 0x61, 0x10,
+    0x51, 0x04, 0x18, 0x0a, 0x1f, 0x80, 0x00, 0x80,
+    0xff, 0xa4, 0x04, 0x2b, 0x94, 0x21, 0x72, 0x94,
+    0x26, 0x05, 0x01, 0x0f, 0xed, 0x0f, 0x0a, 0x64,
+    0x18, 0x1d, 0x23, 0x28, 0x4c, 0xaa, 0x01
+};
+
+/* 301C / 302ELV extended Part2 TV registers (4 tap scaler) */
+
+static const unsigned char SiS_Part2CLVX_1[] = {
+    0x00,0x00,
+    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
+    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
+    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
+};
+
+static const unsigned char SiS_Part2CLVX_2[] = {
+    0x00,0x00,
+    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
+    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
+    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
+};
+
+static const unsigned char SiS_Part2CLVX_3[] = {  /* NTSC, 525i, 525p */
+    0xE0,0x01,
+    0x04,0x1A,0x04,0x7E,0x03,0x1A,0x06,0x7D,0x01,0x1A,0x08,0x7D,0x00,0x19,0x0A,0x7D,
+    0x7F,0x19,0x0C,0x7C,0x7E,0x18,0x0E,0x7C,0x7E,0x17,0x10,0x7B,0x7D,0x15,0x12,0x7C,
+    0x7D,0x13,0x13,0x7D,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0E,0x18,0x7E,
+    0x7D,0x0C,0x19,0x7E,0x7D,0x0A,0x19,0x00,0x7D,0x08,0x1A,0x01,0x7E,0x06,0x1A,0x02,
+    0x58,0x02,
+    0x07,0x14,0x07,0x7E,0x06,0x14,0x09,0x7D,0x05,0x14,0x0A,0x7D,0x04,0x13,0x0B,0x7E,
+    0x03,0x13,0x0C,0x7E,0x02,0x12,0x0D,0x7F,0x01,0x12,0x0E,0x7F,0x01,0x11,0x0F,0x7F,
+    0x00,0x10,0x10,0x00,0x7F,0x0F,0x11,0x01,0x7F,0x0E,0x12,0x01,0x7E,0x0D,0x12,0x03,
+    0x7E,0x0C,0x13,0x03,0x7E,0x0B,0x13,0x04,0x7E,0x0A,0x14,0x04,0x7D,0x09,0x14,0x06,
+    0x00,0x03,
+    0x09,0x0F,0x09,0x7F,0x08,0x0F,0x09,0x00,0x07,0x0F,0x0A,0x00,0x06,0x0F,0x0A,0x01,
+    0x06,0x0E,0x0B,0x01,0x05,0x0E,0x0B,0x02,0x04,0x0E,0x0C,0x02,0x04,0x0D,0x0C,0x03,
+    0x03,0x0D,0x0D,0x03,0x02,0x0C,0x0D,0x05,0x02,0x0C,0x0E,0x04,0x01,0x0B,0x0E,0x06,
+    0x01,0x0B,0x0E,0x06,0x00,0x0A,0x0F,0x07,0x00,0x0A,0x0F,0x07,0x00,0x09,0x0F,0x08,
+    0xFF,0xFF
+};
+
+static const unsigned char SiS_Part2CLVX_4[] = {   /* PAL */
+    0x58,0x02,
+    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
+    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
+    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
+    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
+    0x00,0x03,
+    0x08,0x12,0x08,0x7E,0x07,0x12,0x09,0x7E,0x06,0x12,0x0A,0x7E,0x05,0x11,0x0B,0x7F,
+    0x04,0x11,0x0C,0x7F,0x03,0x11,0x0C,0x00,0x03,0x10,0x0D,0x00,0x02,0x0F,0x0E,0x01,
+    0x01,0x0F,0x0F,0x01,0x01,0x0E,0x0F,0x02,0x00,0x0D,0x10,0x03,0x7F,0x0C,0x11,0x04,
+    0x7F,0x0C,0x11,0x04,0x7F,0x0B,0x11,0x05,0x7E,0x0A,0x12,0x06,0x7E,0x09,0x12,0x07,
+    0x40,0x02,
+    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
+    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
+    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
+    0xFF,0xFF
+};
+
+static const unsigned char SiS_Part2CLVX_5[] = {   /* 750p */
+    0x00,0x03,
+    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
+    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
+    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
+    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
+    0xFF,0xFF
+};
+
+static const unsigned char SiS_Part2CLVX_6[] = {   /* 1080i */
+    0x00,0x04,
+    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
+    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
+    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
+    0xFF,0xFF,
+};
+
+#ifdef CONFIG_FB_SIS_315
+/* 661 et al LCD data structure (2.03.00) */
+static const unsigned char SiS_LCDStruct661[] = {
+    /* 1024x768 */
+/*  type|CR37|   HDE   |   VDE   |    HT   |    VT   |   hss    | hse   */
+    0x02,0xC0,0x00,0x04,0x00,0x03,0x40,0x05,0x26,0x03,0x10,0x00,0x88,
+    0x00,0x02,0x00,0x06,0x00,0x41,0x5A,0x64,0x00,0x00,0x00,0x00,0x04,
+    /*  | vss     |    vse  |clck|  clock  |CRT2DataP|CRT2DataP|idx     */
+    /*                                       VESA    non-VESA  noscale */
+    /* 1280x1024 */
+    0x03,0xC0,0x00,0x05,0x00,0x04,0x98,0x06,0x2A,0x04,0x30,0x00,0x70,
+    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x08,
+    /* 1400x1050 */
+    0x09,0x20,0x78,0x05,0x1A,0x04,0x98,0x06,0x2A,0x04,0x18,0x00,0x38,
+    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x09,
+    /* 1600x1200 */
+    0x0B,0xE0,0x40,0x06,0xB0,0x04,0x70,0x08,0xE2,0x04,0x40,0x00,0xC0,
+    0x00,0x01,0x00,0x03,0x00,0xA2,0x70,0x24,0x00,0x00,0x00,0x00,0x0A,
+    /* 1280x768 (_2) */
+    0x0A,0xE0,0x00,0x05,0x00,0x03,0x7C,0x06,0x26,0x03,0x30,0x00,0x70,
+    0x00,0x03,0x00,0x06,0x00,0x4D,0xC8,0x48,0x00,0x00,0x00,0x00,0x06,
+    /* 1280x720 */
+    0x0E,0xE0,0x00,0x05,0xD0,0x02,0x80,0x05,0x26,0x03,0x10,0x00,0x20,
+    0x00,0x01,0x00,0x06,0x00,0x45,0x9C,0x62,0x00,0x00,0x00,0x00,0x05,
+    /* 1280x800 (_2) */
+    0x0C,0xE0,0x00,0x05,0x20,0x03,0x10,0x06,0x2C,0x03,0x30,0x00,0x70,
+    0x00,0x04,0x00,0x03,0x00,0x49,0xCE,0x1E,0x00,0x00,0x00,0x00,0x09,
+    /* 1680x1050 */
+    0x0D,0xE0,0x90,0x06,0x1A,0x04,0x6C,0x07,0x2A,0x04,0x1A,0x00,0x4C,
+    0x00,0x03,0x00,0x06,0x00,0x79,0xBE,0x44,0x00,0x00,0x00,0x00,0x06,
+    /* 1280x800_3 */
+    0x0C,0xE0,0x00,0x05,0x20,0x03,0xAA,0x05,0x2E,0x03,0x30,0x00,0x50,
+    0x00,0x04,0x00,0x03,0x00,0x47,0xA9,0x10,0x00,0x00,0x00,0x00,0x07,
+    /* 800x600 */
+    0x01,0xC0,0x20,0x03,0x58,0x02,0x20,0x04,0x74,0x02,0x2A,0x00,0x80,
+    0x00,0x06,0x00,0x04,0x00,0x28,0x63,0x4B,0x00,0x00,0x00,0x00,0x00,
+    /* 1280x854 */
+    0x08,0xE0,0x00,0x05,0x56,0x03,0x80,0x06,0x5d,0x03,0x10,0x00,0x70,
+    0x00,0x01,0x00,0x03,0x00,0x54,0x75,0x13,0x00,0x00,0x00,0x00,0x08
+};
+#endif
+
+#ifdef CONFIG_FB_SIS_300
+static unsigned char SiS300_TrumpionData[14][80] = {
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x0B,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x10,0x00,0x00,0x04,0x23,
+    0x00,0x00,0x03,0x28,0x03,0x10,0x05,0x08,0x40,0x10,0x00,0x10,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xBC,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x09,0x04,0x04,0x05,
+    0x04,0x0C,0x09,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5A,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x27,0x00,0x80,0x02,
+    0x20,0x03,0x07,0x00,0x5E,0x01,0x0D,0x02,0x60,0x0C,0x30,0x11,0x00,0x00,0x04,0x23,
+    0x00,0x00,0x03,0x80,0x03,0x28,0x06,0x08,0x40,0x11,0x00,0x11,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0x90,0x01,0xFF,0x0F,0xF4,0x19,0x01,0x00,0x05,0x01,0x00,0x04,0x05,
+    0x04,0x0C,0x02,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEC,0x57,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xD9,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x59,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
+    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
+    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
+    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
+    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
+    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
+    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
+    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
+  /* variant 2 */
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
+    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
+    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
+    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
+    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
+    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
+    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
+    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
+    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
+    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 }
+};
+#endif
+
+#ifdef CONFIG_FB_SIS_315
+static void    SiS_Chrontel701xOn(struct SiS_Private *SiS_Pr);
+static void    SiS_Chrontel701xOff(struct SiS_Private *SiS_Pr);
+static void    SiS_ChrontelInitTVVSync(struct SiS_Private *SiS_Pr);
+static void    SiS_ChrontelDoSomething1(struct SiS_Private *SiS_Pr);
+#endif /* 315 */
+
+#ifdef CONFIG_FB_SIS_300
+static  bool   SiS_SetTrumpionBlock(struct SiS_Private *SiS_Pr, unsigned char *dataptr);
+#endif
+
+static unsigned short  SiS_InitDDCRegs(struct SiS_Private *SiS_Pr, unsigned int VBFlags,
+                               int VGAEngine, unsigned short adaptnum, unsigned short DDCdatatype,
+                               bool checkcr32, unsigned int VBFlags2);
+static unsigned short  SiS_ProbeDDC(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_ReadDDC(struct SiS_Private *SiS_Pr, unsigned short DDCdatatype,
+                               unsigned char *buffer);
+static void            SiS_SetSwitchDDC2(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_SetStart(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_SetStop(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_SetSCLKLow(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_SetSCLKHigh(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_ReadDDC2Data(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_WriteDDC2Data(struct SiS_Private *SiS_Pr, unsigned short tempax);
+static unsigned short  SiS_CheckACK(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_WriteDABDDC(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_PrepareReadDDC(struct SiS_Private *SiS_Pr);
+static unsigned short  SiS_PrepareDDC(struct SiS_Private *SiS_Pr);
+static void            SiS_SendACK(struct SiS_Private *SiS_Pr, unsigned short yesno);
+static unsigned short  SiS_DoProbeDDC(struct SiS_Private *SiS_Pr);
+
+#ifdef CONFIG_FB_SIS_300
+static void            SiS_OEM300Setting(struct SiS_Private *SiS_Pr,
+                               unsigned short ModeNo, unsigned short ModeIdIndex, unsigned short RefTabindex);
+static void            SetOEMLCDData2(struct SiS_Private *SiS_Pr,
+                               unsigned short ModeNo, unsigned short ModeIdIndex,unsigned short RefTableIndex);
+#endif
+#ifdef CONFIG_FB_SIS_315
+static void            SiS_OEM310Setting(struct SiS_Private *SiS_Pr,
+                               unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
+static void            SiS_OEM661Setting(struct SiS_Private *SiS_Pr,
+                               unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
+static void            SiS_FinalizeLCD(struct SiS_Private *, unsigned short, unsigned short);
+#endif
+
 static unsigned short  SiS_GetBIOSLCDResInfo(struct SiS_Private *SiS_Pr);
 static void            SiS_SetCH70xx(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
 
index 2112d6d7feda02fb101a3fddf4d99b2c53b23bbb..6e5cf14c4ce4eddc9b802503a9009a1572af6c20 100644 (file)
 #include "sis.h"
 #include <video/sisfb.h>
 
-static const unsigned char SiS_YPbPrTable[3][64] = {
-  {
-    0x17,0x1d,0x03,0x09,0x05,0x06,0x0c,0x0c,
-    0x94,0x49,0x01,0x0a,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x1b,
-    0x0c,0x50,0x00,0x97,0x00,0xda,0x4a,0x17,
-    0x7d,0x05,0x4b,0x00,0x00,0xe2,0x00,0x02,
-    0x03,0x0a,0x65,0x9d /*0x8d*/,0x08,0x92,0x8f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x53 /*0x50*/,
-    0x00,0x40,0x44,0x00,0xdb,0x02,0x3b,0x00
-  },
-  {
-    0x33,0x06,0x06,0x09,0x0b,0x0c,0x0c,0x0c,
-    0x98,0x0a,0x01,0x0d,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
-    0x0c,0x50,0xb2,0x9f,0x16,0x59,0x4f,0x13,
-    0xad,0x11,0xad,0x1d,0x40,0x8a,0x3d,0xb8,
-    0x51,0x5e,0x60,0x49,0x7d,0x92,0x0f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x4e,
-    0x43,0x41,0x11,0x00,0xfc,0xff,0x32,0x00
-  },
-  {
-#if 0 /* OK, but sticks to left edge */
-    0x13,0x1d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
-    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
-    0xed,0x50,0x70,0x9f,0x16,0x59,0x21 /*0x2b*/,0x13,
-    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
-    0x4b,0x4b,0x65 /*0x6f*/,0x2f,0x63,0x92,0x0f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x27,
-    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
-#endif
-#if 1 /* Perfect */
-    0x23,0x2d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
-    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
-    0xed,0x50,0x70,0x9f,0x16,0x59,0x60,0x13,
-    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
-    0x4b,0x4b,0x6f,0x2f,0x63,0x92,0x0f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x73,
-    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
-#endif
-  }
-};
-
-static const unsigned char SiS_TVPhase[] =
-{
-       0x21,0xED,0xBA,0x08,    /* 0x00 SiS_NTSCPhase */
-       0x2A,0x05,0xE3,0x00,    /* 0x01 SiS_PALPhase */
-       0x21,0xE4,0x2E,0x9B,    /* 0x02 SiS_PALMPhase */
-       0x21,0xF4,0x3E,0xBA,    /* 0x03 SiS_PALNPhase */
-       0x1E,0x8B,0xA2,0xA7,
-       0x1E,0x83,0x0A,0xE0,    /* 0x05 SiS_SpecialPhaseM */
-       0x00,0x00,0x00,0x00,
-       0x00,0x00,0x00,0x00,
-       0x21,0xF0,0x7B,0xD6,    /* 0x08 SiS_NTSCPhase2 */
-       0x2A,0x09,0x86,0xE9,    /* 0x09 SiS_PALPhase2 */
-       0x21,0xE6,0xEF,0xA4,    /* 0x0a SiS_PALMPhase2 */
-       0x21,0xF6,0x94,0x46,    /* 0x0b SiS_PALNPhase2 */
-       0x1E,0x8B,0xA2,0xA7,
-       0x1E,0x83,0x0A,0xE0,    /* 0x0d SiS_SpecialPhaseM */
-       0x00,0x00,0x00,0x00,
-       0x00,0x00,0x00,0x00,
-       0x1e,0x8c,0x5c,0x7a,    /* 0x10 SiS_SpecialPhase */
-       0x25,0xd4,0xfd,0x5e     /* 0x11 SiS_SpecialPhaseJ */
-};
-
-static const unsigned char SiS_HiTVGroup3_1[] = {
-    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x13,
-    0xb1, 0x41, 0x62, 0x62, 0xff, 0xf4, 0x45, 0xa6,
-    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
-    0xac, 0xda, 0x60, 0xfe, 0x6a, 0x9a, 0x06, 0x10,
-    0xd1, 0x04, 0x18, 0x0a, 0xff, 0x80, 0x00, 0x80,
-    0x3b, 0x77, 0x00, 0xef, 0xe0, 0x10, 0xb0, 0xe0,
-    0x10, 0x4f, 0x0f, 0x0f, 0x05, 0x0f, 0x08, 0x6e,
-    0x1a, 0x1f, 0x25, 0x2a, 0x4c, 0xaa, 0x01
-};
-
-static const unsigned char SiS_HiTVGroup3_2[] = {
-    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x7a,
-    0x54, 0x41, 0xe7, 0xe7, 0xff, 0xf4, 0x45, 0xa6,
-    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
-    0xac, 0x6a, 0x60, 0x2b, 0x52, 0xcd, 0x61, 0x10,
-    0x51, 0x04, 0x18, 0x0a, 0x1f, 0x80, 0x00, 0x80,
-    0xff, 0xa4, 0x04, 0x2b, 0x94, 0x21, 0x72, 0x94,
-    0x26, 0x05, 0x01, 0x0f, 0xed, 0x0f, 0x0a, 0x64,
-    0x18, 0x1d, 0x23, 0x28, 0x4c, 0xaa, 0x01
-};
-
-/* 301C / 302ELV extended Part2 TV registers (4 tap scaler) */
-
-static const unsigned char SiS_Part2CLVX_1[] = {
-    0x00,0x00,
-    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
-    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
-    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
-};
-
-static const unsigned char SiS_Part2CLVX_2[] = {
-    0x00,0x00,
-    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
-    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
-    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
-};
-
-static const unsigned char SiS_Part2CLVX_3[] = {  /* NTSC, 525i, 525p */
-    0xE0,0x01,
-    0x04,0x1A,0x04,0x7E,0x03,0x1A,0x06,0x7D,0x01,0x1A,0x08,0x7D,0x00,0x19,0x0A,0x7D,
-    0x7F,0x19,0x0C,0x7C,0x7E,0x18,0x0E,0x7C,0x7E,0x17,0x10,0x7B,0x7D,0x15,0x12,0x7C,
-    0x7D,0x13,0x13,0x7D,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0E,0x18,0x7E,
-    0x7D,0x0C,0x19,0x7E,0x7D,0x0A,0x19,0x00,0x7D,0x08,0x1A,0x01,0x7E,0x06,0x1A,0x02,
-    0x58,0x02,
-    0x07,0x14,0x07,0x7E,0x06,0x14,0x09,0x7D,0x05,0x14,0x0A,0x7D,0x04,0x13,0x0B,0x7E,
-    0x03,0x13,0x0C,0x7E,0x02,0x12,0x0D,0x7F,0x01,0x12,0x0E,0x7F,0x01,0x11,0x0F,0x7F,
-    0x00,0x10,0x10,0x00,0x7F,0x0F,0x11,0x01,0x7F,0x0E,0x12,0x01,0x7E,0x0D,0x12,0x03,
-    0x7E,0x0C,0x13,0x03,0x7E,0x0B,0x13,0x04,0x7E,0x0A,0x14,0x04,0x7D,0x09,0x14,0x06,
-    0x00,0x03,
-    0x09,0x0F,0x09,0x7F,0x08,0x0F,0x09,0x00,0x07,0x0F,0x0A,0x00,0x06,0x0F,0x0A,0x01,
-    0x06,0x0E,0x0B,0x01,0x05,0x0E,0x0B,0x02,0x04,0x0E,0x0C,0x02,0x04,0x0D,0x0C,0x03,
-    0x03,0x0D,0x0D,0x03,0x02,0x0C,0x0D,0x05,0x02,0x0C,0x0E,0x04,0x01,0x0B,0x0E,0x06,
-    0x01,0x0B,0x0E,0x06,0x00,0x0A,0x0F,0x07,0x00,0x0A,0x0F,0x07,0x00,0x09,0x0F,0x08,
-    0xFF,0xFF
-};
-
-static const unsigned char SiS_Part2CLVX_4[] = {   /* PAL */
-    0x58,0x02,
-    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
-    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
-    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
-    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
-    0x00,0x03,
-    0x08,0x12,0x08,0x7E,0x07,0x12,0x09,0x7E,0x06,0x12,0x0A,0x7E,0x05,0x11,0x0B,0x7F,
-    0x04,0x11,0x0C,0x7F,0x03,0x11,0x0C,0x00,0x03,0x10,0x0D,0x00,0x02,0x0F,0x0E,0x01,
-    0x01,0x0F,0x0F,0x01,0x01,0x0E,0x0F,0x02,0x00,0x0D,0x10,0x03,0x7F,0x0C,0x11,0x04,
-    0x7F,0x0C,0x11,0x04,0x7F,0x0B,0x11,0x05,0x7E,0x0A,0x12,0x06,0x7E,0x09,0x12,0x07,
-    0x40,0x02,
-    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
-    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
-    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
-    0xFF,0xFF
-};
-
-static const unsigned char SiS_Part2CLVX_5[] = {   /* 750p */
-    0x00,0x03,
-    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
-    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
-    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
-    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
-    0xFF,0xFF
-};
-
-static const unsigned char SiS_Part2CLVX_6[] = {   /* 1080i */
-    0x00,0x04,
-    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
-    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
-    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
-    0xFF,0xFF,
-};
-
-#ifdef CONFIG_FB_SIS_315
-/* 661 et al LCD data structure (2.03.00) */
-static const unsigned char SiS_LCDStruct661[] = {
-    /* 1024x768 */
-/*  type|CR37|   HDE   |   VDE   |    HT   |    VT   |   hss    | hse   */
-    0x02,0xC0,0x00,0x04,0x00,0x03,0x40,0x05,0x26,0x03,0x10,0x00,0x88,
-    0x00,0x02,0x00,0x06,0x00,0x41,0x5A,0x64,0x00,0x00,0x00,0x00,0x04,
-    /*  | vss     |    vse  |clck|  clock  |CRT2DataP|CRT2DataP|idx     */
-    /*                                       VESA    non-VESA  noscale */
-    /* 1280x1024 */
-    0x03,0xC0,0x00,0x05,0x00,0x04,0x98,0x06,0x2A,0x04,0x30,0x00,0x70,
-    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x08,
-    /* 1400x1050 */
-    0x09,0x20,0x78,0x05,0x1A,0x04,0x98,0x06,0x2A,0x04,0x18,0x00,0x38,
-    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x09,
-    /* 1600x1200 */
-    0x0B,0xE0,0x40,0x06,0xB0,0x04,0x70,0x08,0xE2,0x04,0x40,0x00,0xC0,
-    0x00,0x01,0x00,0x03,0x00,0xA2,0x70,0x24,0x00,0x00,0x00,0x00,0x0A,
-    /* 1280x768 (_2) */
-    0x0A,0xE0,0x00,0x05,0x00,0x03,0x7C,0x06,0x26,0x03,0x30,0x00,0x70,
-    0x00,0x03,0x00,0x06,0x00,0x4D,0xC8,0x48,0x00,0x00,0x00,0x00,0x06,
-    /* 1280x720 */
-    0x0E,0xE0,0x00,0x05,0xD0,0x02,0x80,0x05,0x26,0x03,0x10,0x00,0x20,
-    0x00,0x01,0x00,0x06,0x00,0x45,0x9C,0x62,0x00,0x00,0x00,0x00,0x05,
-    /* 1280x800 (_2) */
-    0x0C,0xE0,0x00,0x05,0x20,0x03,0x10,0x06,0x2C,0x03,0x30,0x00,0x70,
-    0x00,0x04,0x00,0x03,0x00,0x49,0xCE,0x1E,0x00,0x00,0x00,0x00,0x09,
-    /* 1680x1050 */
-    0x0D,0xE0,0x90,0x06,0x1A,0x04,0x6C,0x07,0x2A,0x04,0x1A,0x00,0x4C,
-    0x00,0x03,0x00,0x06,0x00,0x79,0xBE,0x44,0x00,0x00,0x00,0x00,0x06,
-    /* 1280x800_3 */
-    0x0C,0xE0,0x00,0x05,0x20,0x03,0xAA,0x05,0x2E,0x03,0x30,0x00,0x50,
-    0x00,0x04,0x00,0x03,0x00,0x47,0xA9,0x10,0x00,0x00,0x00,0x00,0x07,
-    /* 800x600 */
-    0x01,0xC0,0x20,0x03,0x58,0x02,0x20,0x04,0x74,0x02,0x2A,0x00,0x80,
-    0x00,0x06,0x00,0x04,0x00,0x28,0x63,0x4B,0x00,0x00,0x00,0x00,0x00,
-    /* 1280x854 */
-    0x08,0xE0,0x00,0x05,0x56,0x03,0x80,0x06,0x5d,0x03,0x10,0x00,0x70,
-    0x00,0x01,0x00,0x03,0x00,0x54,0x75,0x13,0x00,0x00,0x00,0x00,0x08
-};
-#endif
-
-#ifdef CONFIG_FB_SIS_300
-static unsigned char SiS300_TrumpionData[14][80] = {
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x0B,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x10,0x00,0x00,0x04,0x23,
-    0x00,0x00,0x03,0x28,0x03,0x10,0x05,0x08,0x40,0x10,0x00,0x10,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xBC,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x09,0x04,0x04,0x05,
-    0x04,0x0C,0x09,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5A,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x27,0x00,0x80,0x02,
-    0x20,0x03,0x07,0x00,0x5E,0x01,0x0D,0x02,0x60,0x0C,0x30,0x11,0x00,0x00,0x04,0x23,
-    0x00,0x00,0x03,0x80,0x03,0x28,0x06,0x08,0x40,0x11,0x00,0x11,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0x90,0x01,0xFF,0x0F,0xF4,0x19,0x01,0x00,0x05,0x01,0x00,0x04,0x05,
-    0x04,0x0C,0x02,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEC,0x57,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xD9,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x59,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
-    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
-    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
-    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
-    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
-    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
-    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
-    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
-  /* variant 2 */
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
-    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
-    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
-    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
-    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
-    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
-    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
-    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
-    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
-    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 }
-};
-#endif
-
 void           SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
 void           SiS_EnableCRT2(struct SiS_Private *SiS_Pr);
 unsigned short SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
@@ -375,16 +94,11 @@ unsigned short     SiS_GetCH701x(struct SiS_Private *SiS_Pr, unsigned short tempax);
 void           SiS_SetCH70xxANDOR(struct SiS_Private *SiS_Pr, unsigned short reg,
                        unsigned char orval,unsigned short andval);
 #ifdef CONFIG_FB_SIS_315
-static void    SiS_Chrontel701xOn(struct SiS_Private *SiS_Pr);
-static void    SiS_Chrontel701xOff(struct SiS_Private *SiS_Pr);
-static void    SiS_ChrontelInitTVVSync(struct SiS_Private *SiS_Pr);
-static void    SiS_ChrontelDoSomething1(struct SiS_Private *SiS_Pr);
 void           SiS_Chrontel701xBLOn(struct SiS_Private *SiS_Pr);
 void           SiS_Chrontel701xBLOff(struct SiS_Private *SiS_Pr);
 #endif /* 315 */
 
 #ifdef CONFIG_FB_SIS_300
-static  bool   SiS_SetTrumpionBlock(struct SiS_Private *SiS_Pr, unsigned char *dataptr);
 void           SiS_SetChrontelGPIO(struct SiS_Private *SiS_Pr, unsigned short myvbinfo);
 #endif
 
@@ -394,40 +108,6 @@ unsigned short     SiS_HandleDDC(struct SiS_Private *SiS_Pr, unsigned int VBFlags, i
                        unsigned short adaptnum, unsigned short DDCdatatype,
                        unsigned char *buffer, unsigned int VBFlags2);
 
-static unsigned short  SiS_InitDDCRegs(struct SiS_Private *SiS_Pr, unsigned int VBFlags,
-                               int VGAEngine, unsigned short adaptnum, unsigned short DDCdatatype,
-                               bool checkcr32, unsigned int VBFlags2);
-static unsigned short  SiS_ProbeDDC(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_ReadDDC(struct SiS_Private *SiS_Pr, unsigned short DDCdatatype,
-                               unsigned char *buffer);
-static void            SiS_SetSwitchDDC2(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_SetStart(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_SetStop(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_SetSCLKLow(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_SetSCLKHigh(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_ReadDDC2Data(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_WriteDDC2Data(struct SiS_Private *SiS_Pr, unsigned short tempax);
-static unsigned short  SiS_CheckACK(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_WriteDABDDC(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_PrepareReadDDC(struct SiS_Private *SiS_Pr);
-static unsigned short  SiS_PrepareDDC(struct SiS_Private *SiS_Pr);
-static void            SiS_SendACK(struct SiS_Private *SiS_Pr, unsigned short yesno);
-static unsigned short  SiS_DoProbeDDC(struct SiS_Private *SiS_Pr);
-
-#ifdef CONFIG_FB_SIS_300
-static void            SiS_OEM300Setting(struct SiS_Private *SiS_Pr,
-                               unsigned short ModeNo, unsigned short ModeIdIndex, unsigned short RefTabindex);
-static void            SetOEMLCDData2(struct SiS_Private *SiS_Pr,
-                               unsigned short ModeNo, unsigned short ModeIdIndex,unsigned short RefTableIndex);
-#endif
-#ifdef CONFIG_FB_SIS_315
-static void            SiS_OEM310Setting(struct SiS_Private *SiS_Pr,
-                               unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
-static void            SiS_OEM661Setting(struct SiS_Private *SiS_Pr,
-                               unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
-static void            SiS_FinalizeLCD(struct SiS_Private *, unsigned short, unsigned short);
-#endif
-
 extern void            SiS_DisplayOff(struct SiS_Private *SiS_Pr);
 extern void            SiS_DisplayOn(struct SiS_Private *SiS_Pr);
 extern bool            SiS_SearchModeID(struct SiS_Private *, unsigned short *, unsigned short *);
index ea1d1c9640bfbf187e5b7c1fd97300b70962846f..d04982b0cd6f7685b5215c1374cd89ad3fe87cb6 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "vgatypes.h"
 #include "vstruct.h"
+#include "init.h"
 
 #define VER_MAJOR              1
 #define VER_MINOR              8
@@ -321,6 +322,85 @@ u8 SiS_GetRegByte(SISIOADDRESS);
 u16 SiS_GetRegShort(SISIOADDRESS);
 u32 SiS_GetRegLong(SISIOADDRESS);
 
+/* Chrontel TV, DDC and DPMS functions */
+/* from init.c */
+bool           SiSInitPtr(struct SiS_Private *SiS_Pr);
+unsigned short SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
+                               int VDisplay, int Depth, bool FSTN,
+                               unsigned short CustomT, int LCDwith, int LCDheight,
+                               unsigned int VBFlags2);
+unsigned short SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
+                               int VDisplay, int Depth, unsigned int VBFlags2);
+unsigned short SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
+                               int VDisplay, int Depth, unsigned int VBFlags2);
+
+void           SiS_DisplayOn(struct SiS_Private *SiS_Pr);
+void           SiS_DisplayOff(struct SiS_Private *SiS_Pr);
+void           SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
+void           SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
+void           SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
+unsigned short SiS_GetModeFlag(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex);
+bool           SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
+
+bool           SiS_SearchModeID(struct SiS_Private *SiS_Pr, unsigned short *ModeNo,
+                               unsigned short *ModeIdIndex);
+unsigned short SiS_GetModePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex);
+unsigned short  SiS_GetRefCRTVCLK(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
+unsigned short  SiS_GetRefCRT1CRTC(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
+unsigned short SiS_GetColorDepth(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex);
+unsigned short SiS_GetOffset(struct SiS_Private *SiS_Pr,unsigned short ModeNo,
+                               unsigned short ModeIdIndex, unsigned short RRTI);
+#ifdef CONFIG_FB_SIS_300
+void           SiS_GetFIFOThresholdIndex300(struct SiS_Private *SiS_Pr, unsigned short *idx1,
+                               unsigned short *idx2);
+unsigned short SiS_GetFIFOThresholdB300(unsigned short idx1, unsigned short idx2);
+unsigned short SiS_GetLatencyFactor630(struct SiS_Private *SiS_Pr, unsigned short index);
+#endif
+void           SiS_LoadDAC(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
+bool           SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
+void           SiS_CalcCRRegisters(struct SiS_Private *SiS_Pr, int depth);
+void           SiS_CalcLCDACRT1Timing(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex);
+void           SiS_Generic_ConvertCRData(struct SiS_Private *SiS_Pr, unsigned char *crdata, int xres,
+                               int yres, struct fb_var_screeninfo *var, bool writeres);
+
+/* From init301.c: */
+extern void            SiS_GetVBInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex, int chkcrt2mode);
+extern void            SiS_GetLCDResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex);
+extern void            SiS_SetYPbPr(struct SiS_Private *SiS_Pr);
+extern void            SiS_SetTVMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex);
+extern void            SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
+extern void            SiS_DisableBridge(struct SiS_Private *);
+extern bool            SiS_SetCRT2Group(struct SiS_Private *, unsigned short);
+extern unsigned short  SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex);
+extern void            SiS_WaitRetrace1(struct SiS_Private *SiS_Pr);
+extern unsigned short  SiS_GetResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex);
+extern unsigned short  SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short tempax);
+extern unsigned short  SiS_GetVCLK2Ptr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+                               unsigned short ModeIdIndex, unsigned short RRTI);
+extern bool            SiS_IsVAMode(struct SiS_Private *);
+extern bool            SiS_IsDualEdge(struct SiS_Private *);
+
+#ifdef CONFIG_FB_SIS_300
+extern unsigned int    sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
+extern void            sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg,
+                               unsigned int val);
+#endif
+#ifdef CONFIG_FB_SIS_315
+extern void            sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg,
+                               unsigned char val);
+extern unsigned int    sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
+#endif
+
+
 /* MMIO access macros */
 #define MMIO_IN8(base, offset)  readb((base+offset))
 #define MMIO_IN16(base, offset) readw((base+offset))
@@ -583,4 +663,55 @@ struct sis_video_info {
        struct sis_video_info *next;
 };
 
+/* from sis_accel.c */
+extern void    fbcon_sis_fillrect(struct fb_info *info,
+                               const struct fb_fillrect *rect);
+extern void    fbcon_sis_copyarea(struct fb_info *info,
+                               const struct fb_copyarea *area);
+extern int     fbcon_sis_sync(struct fb_info *info);
+
+/* Internal 2D accelerator functions */
+extern int     sisfb_initaccel(struct sis_video_info *ivideo);
+extern void    sisfb_syncaccel(struct sis_video_info *ivideo);
+
+/* Internal general routines */
+#ifdef CONFIG_FB_SIS_300
+unsigned int   sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
+void           sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg, unsigned int val);
+unsigned int   sisfb_read_lpc_pci_dword(struct SiS_Private *SiS_Pr, int reg);
+#endif
+#ifdef CONFIG_FB_SIS_315
+void           sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg, unsigned char val);
+unsigned int   sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
+#endif
+
+/* SiS-specific exported functions */
+void                   sis_malloc(struct sis_memreq *req);
+void                   sis_malloc_new(struct pci_dev *pdev, struct sis_memreq *req);
+void                   sis_free(u32 base);
+void                   sis_free_new(struct pci_dev *pdev, u32 base);
+
+/* Routines from init.c/init301.c */
+extern unsigned short  SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
+                               int VDisplay, int Depth, bool FSTN, unsigned short CustomT,
+                               int LCDwith, int LCDheight, unsigned int VBFlags2);
+extern unsigned short  SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
+                               int VDisplay, int Depth, unsigned int VBFlags2);
+extern unsigned short  SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
+                               int VDisplay, int Depth, unsigned int VBFlags2);
+extern void            SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
+extern bool            SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
+extern void            SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
+extern void            SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
+
+extern bool            SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
+
+extern bool            sisfb_gettotalfrommode(struct SiS_Private *SiS_Pr, unsigned char modeno,
+                               int *htotal, int *vtotal, unsigned char rateindex);
+extern int             sisfb_mode_rate_to_dclock(struct SiS_Private *SiS_Pr,
+                               unsigned char modeno, unsigned char rateindex);
+extern int             sisfb_mode_rate_to_ddata(struct SiS_Private *SiS_Pr, unsigned char modeno,
+                               unsigned char rateindex, struct fb_var_screeninfo *var);
+
+
 #endif
index ecdd054d89510d0d68281c37ac7eb9caa9bb2c8e..20aff90059781743c04804b9d6b0d2294788c8b0 100644 (file)
 
 #include "sis.h"
 #include "sis_main.h"
+#include "init301.h"
 
 #if !defined(CONFIG_FB_SIS_300) && !defined(CONFIG_FB_SIS_315)
 #warning Neither CONFIG_FB_SIS_300 nor CONFIG_FB_SIS_315 is set
 #warning sisfb will not work!
 #endif
 
+/* ---------------------- Prototypes ------------------------- */
+
+/* Interface used by the world */
+#ifndef MODULE
+static int sisfb_setup(char *options);
+#endif
+
+/* Interface to the low level console driver */
+static int sisfb_init(void);
+
+/* fbdev routines */
+static int     sisfb_get_fix(struct fb_fix_screeninfo *fix, int con,
+                               struct fb_info *info);
+
+static int     sisfb_ioctl(struct fb_info *info, unsigned int cmd,
+                           unsigned long arg);
+static int     sisfb_set_par(struct fb_info *info);
+static int     sisfb_blank(int blank,
+                               struct fb_info *info);
+
 static void sisfb_handle_command(struct sis_video_info *ivideo,
                                 struct sisfb_cmd *sisfb_command);
 
+static void    sisfb_search_mode(char *name, bool quiet);
+static int     sisfb_validate_mode(struct sis_video_info *ivideo, int modeindex, u32 vbflags);
+static u8      sisfb_search_refresh_rate(struct sis_video_info *ivideo, unsigned int rate,
+                               int index);
+static int     sisfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+                               unsigned blue, unsigned transp,
+                               struct fb_info *fb_info);
+static int     sisfb_do_set_var(struct fb_var_screeninfo *var, int isactive,
+                               struct fb_info *info);
+static void    sisfb_pre_setmode(struct sis_video_info *ivideo);
+static void    sisfb_post_setmode(struct sis_video_info *ivideo);
+static bool    sisfb_CheckVBRetrace(struct sis_video_info *ivideo);
+static bool    sisfbcheckvretracecrt2(struct sis_video_info *ivideo);
+static bool    sisfbcheckvretracecrt1(struct sis_video_info *ivideo);
+static bool    sisfb_bridgeisslave(struct sis_video_info *ivideo);
+static void    sisfb_detect_VB_connect(struct sis_video_info *ivideo);
+static void    sisfb_get_VB_type(struct sis_video_info *ivideo);
+static void    sisfb_set_TVxposoffset(struct sis_video_info *ivideo, int val);
+static void    sisfb_set_TVyposoffset(struct sis_video_info *ivideo, int val);
+
+/* Internal heap routines */
+static int             sisfb_heap_init(struct sis_video_info *ivideo);
+static struct SIS_OH * sisfb_poh_new_node(struct SIS_HEAP *memheap);
+static struct SIS_OH * sisfb_poh_allocate(struct SIS_HEAP *memheap, u32 size);
+static void            sisfb_delete_node(struct SIS_OH *poh);
+static void            sisfb_insert_node(struct SIS_OH *pohList, struct SIS_OH *poh);
+static struct SIS_OH * sisfb_poh_free(struct SIS_HEAP *memheap, u32 base);
+static void            sisfb_free_node(struct SIS_HEAP *memheap, struct SIS_OH *poh);
+
+
 /* ------------------ Internal helper routines ----------------- */
 
 static void __init
index 32e23c20943056ec221c10f338cbdae964421742..d8ba07061f1ed4e9bfd113c2ad63299317defb37 100644 (file)
@@ -661,121 +661,4 @@ static struct _customttable {
        }
 };
 
-/* ---------------------- Prototypes ------------------------- */
-
-/* Interface used by the world */
-#ifndef MODULE
-static int sisfb_setup(char *options);
 #endif
-
-/* Interface to the low level console driver */
-static int sisfb_init(void);
-
-/* fbdev routines */
-static int     sisfb_get_fix(struct fb_fix_screeninfo *fix, int con,
-                               struct fb_info *info);
-
-static int     sisfb_ioctl(struct fb_info *info, unsigned int cmd,
-                           unsigned long arg);
-static int     sisfb_set_par(struct fb_info *info);
-static int     sisfb_blank(int blank,
-                               struct fb_info *info);
-extern void    fbcon_sis_fillrect(struct fb_info *info,
-                               const struct fb_fillrect *rect);
-extern void    fbcon_sis_copyarea(struct fb_info *info,
-                               const struct fb_copyarea *area);
-extern int     fbcon_sis_sync(struct fb_info *info);
-
-/* Internal 2D accelerator functions */
-extern int     sisfb_initaccel(struct sis_video_info *ivideo);
-extern void    sisfb_syncaccel(struct sis_video_info *ivideo);
-
-/* Internal general routines */
-static void    sisfb_search_mode(char *name, bool quiet);
-static int     sisfb_validate_mode(struct sis_video_info *ivideo, int modeindex, u32 vbflags);
-static u8      sisfb_search_refresh_rate(struct sis_video_info *ivideo, unsigned int rate,
-                               int index);
-static int     sisfb_setcolreg(unsigned regno, unsigned red, unsigned green,
-                               unsigned blue, unsigned transp,
-                               struct fb_info *fb_info);
-static int     sisfb_do_set_var(struct fb_var_screeninfo *var, int isactive,
-                               struct fb_info *info);
-static void    sisfb_pre_setmode(struct sis_video_info *ivideo);
-static void    sisfb_post_setmode(struct sis_video_info *ivideo);
-static bool    sisfb_CheckVBRetrace(struct sis_video_info *ivideo);
-static bool    sisfbcheckvretracecrt2(struct sis_video_info *ivideo);
-static bool    sisfbcheckvretracecrt1(struct sis_video_info *ivideo);
-static bool    sisfb_bridgeisslave(struct sis_video_info *ivideo);
-static void    sisfb_detect_VB_connect(struct sis_video_info *ivideo);
-static void    sisfb_get_VB_type(struct sis_video_info *ivideo);
-static void    sisfb_set_TVxposoffset(struct sis_video_info *ivideo, int val);
-static void    sisfb_set_TVyposoffset(struct sis_video_info *ivideo, int val);
-#ifdef CONFIG_FB_SIS_300
-unsigned int   sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
-void           sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg, unsigned int val);
-unsigned int   sisfb_read_lpc_pci_dword(struct SiS_Private *SiS_Pr, int reg);
-#endif
-#ifdef CONFIG_FB_SIS_315
-void           sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg, unsigned char val);
-unsigned int   sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
-#endif
-
-/* SiS-specific exported functions */
-void                   sis_malloc(struct sis_memreq *req);
-void                   sis_malloc_new(struct pci_dev *pdev, struct sis_memreq *req);
-void                   sis_free(u32 base);
-void                   sis_free_new(struct pci_dev *pdev, u32 base);
-
-/* Internal heap routines */
-static int             sisfb_heap_init(struct sis_video_info *ivideo);
-static struct SIS_OH * sisfb_poh_new_node(struct SIS_HEAP *memheap);
-static struct SIS_OH * sisfb_poh_allocate(struct SIS_HEAP *memheap, u32 size);
-static void            sisfb_delete_node(struct SIS_OH *poh);
-static void            sisfb_insert_node(struct SIS_OH *pohList, struct SIS_OH *poh);
-static struct SIS_OH * sisfb_poh_free(struct SIS_HEAP *memheap, u32 base);
-static void            sisfb_free_node(struct SIS_HEAP *memheap, struct SIS_OH *poh);
-
-/* Routines from init.c/init301.c */
-extern unsigned short  SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
-                               int VDisplay, int Depth, bool FSTN, unsigned short CustomT,
-                               int LCDwith, int LCDheight, unsigned int VBFlags2);
-extern unsigned short  SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
-                               int VDisplay, int Depth, unsigned int VBFlags2);
-extern unsigned short  SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
-                               int VDisplay, int Depth, unsigned int VBFlags2);
-extern void            SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
-extern bool            SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
-extern void            SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
-extern void            SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
-
-extern bool            SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
-
-extern bool            sisfb_gettotalfrommode(struct SiS_Private *SiS_Pr, unsigned char modeno,
-                               int *htotal, int *vtotal, unsigned char rateindex);
-extern int             sisfb_mode_rate_to_dclock(struct SiS_Private *SiS_Pr,
-                               unsigned char modeno, unsigned char rateindex);
-extern int             sisfb_mode_rate_to_ddata(struct SiS_Private *SiS_Pr, unsigned char modeno,
-                               unsigned char rateindex, struct fb_var_screeninfo *var);
-
-/* Chrontel TV, DDC and DPMS functions */
-extern unsigned short  SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short reg);
-extern void            SiS_SetCH700x(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
-extern unsigned short  SiS_GetCH701x(struct SiS_Private *SiS_Pr, unsigned short reg);
-extern void            SiS_SetCH701x(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
-extern void            SiS_SetCH70xxANDOR(struct SiS_Private *SiS_Pr, unsigned short reg,
-                               unsigned char myor, unsigned char myand);
-extern void            SiS_DDC2Delay(struct SiS_Private *SiS_Pr, unsigned int delaytime);
-extern void            SiS_SetChrontelGPIO(struct SiS_Private *SiS_Pr, unsigned short myvbinfo);
-extern unsigned short  SiS_HandleDDC(struct SiS_Private *SiS_Pr, unsigned int VBFlags, int VGAEngine,
-                               unsigned short adaptnum, unsigned short DDCdatatype, unsigned char *buffer,
-                               unsigned int VBFlags2);
-extern unsigned short  SiS_ReadDDC1Bit(struct SiS_Private *SiS_Pr);
-#ifdef CONFIG_FB_SIS_315
-extern void            SiS_Chrontel701xBLOn(struct SiS_Private *SiS_Pr);
-extern void            SiS_Chrontel701xBLOff(struct SiS_Private *SiS_Pr);
-#endif
-extern void            SiS_SiS30xBLOn(struct SiS_Private *SiS_Pr);
-extern void            SiS_SiS30xBLOff(struct SiS_Private *SiS_Pr);
-#endif
-
-
index 8db7085e5d1aa345df4a842f7698c3800d3918ab..22b606af0a875bc6fc3c59c33f5c6e953fff7d92 100644 (file)
@@ -1293,7 +1293,6 @@ static struct fb_ops ufx_ops = {
  * Assumes no active clients have framebuffer open */
 static int ufx_realloc_framebuffer(struct ufx_data *dev, struct fb_info *info)
 {
-       int retval = -ENOMEM;
        int old_len = info->fix.smem_len;
        int new_len;
        unsigned char *old_fb = info->screen_base;
@@ -1308,10 +1307,8 @@ static int ufx_realloc_framebuffer(struct ufx_data *dev, struct fb_info *info)
                 * Alloc system memory for virtual framebuffer
                 */
                new_fb = vmalloc(new_len);
-               if (!new_fb) {
-                       pr_err("Virtual framebuffer alloc failed");
-                       goto error;
-               }
+               if (!new_fb)
+                       return -ENOMEM;
 
                if (info->screen_base) {
                        memcpy(new_fb, old_fb, old_len);
@@ -1323,11 +1320,7 @@ static int ufx_realloc_framebuffer(struct ufx_data *dev, struct fb_info *info)
                info->fix.smem_start = (unsigned long) new_fb;
                info->flags = smscufx_info_flags;
        }
-
-       retval = 0;
-
-error:
-       return retval;
+       return 0;
 }
 
 /* sets up I2C Controller for 100 Kbps, std. speed, 7-bit addr, master,
@@ -1620,8 +1613,8 @@ static int ufx_usb_probe(struct usb_interface *interface,
 {
        struct usb_device *usbdev;
        struct ufx_data *dev;
-       struct fb_info *info = NULL;
-       int retval = -ENOMEM;
+       struct fb_info *info;
+       int retval;
        u32 id_rev, fpga_rev;
 
        /* usb initialization */
@@ -1631,7 +1624,7 @@ static int ufx_usb_probe(struct usb_interface *interface,
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (dev == NULL) {
                dev_err(&usbdev->dev, "ufx_usb_probe: failed alloc of dev struct\n");
-               goto error;
+               return -ENOMEM;
        }
 
        /* we need to wait for both usb and fbdev to spin down on disconnect */
@@ -1652,9 +1645,8 @@ static int ufx_usb_probe(struct usb_interface *interface,
        dev_dbg(dev->gdev, "fb_defio enable=%d\n", fb_defio);
 
        if (!ufx_alloc_urb_list(dev, WRITES_IN_FLIGHT, MAX_TRANSFER)) {
-               retval = -ENOMEM;
                dev_err(dev->gdev, "ufx_alloc_urb_list failed\n");
-               goto error;
+               goto e_nomem;
        }
 
        /* We don't register a new USB class. Our client interface is fbdev */
@@ -1662,9 +1654,8 @@ static int ufx_usb_probe(struct usb_interface *interface,
        /* allocates framebuffer driver structure, not framebuffer memory */
        info = framebuffer_alloc(0, &usbdev->dev);
        if (!info) {
-               retval = -ENOMEM;
                dev_err(dev->gdev, "framebuffer_alloc failed\n");
-               goto error;
+               goto e_nomem;
        }
 
        dev->info = info;
@@ -1675,7 +1666,7 @@ static int ufx_usb_probe(struct usb_interface *interface,
        retval = fb_alloc_cmap(&info->cmap, 256, 0);
        if (retval < 0) {
                dev_err(dev->gdev, "fb_alloc_cmap failed %x\n", retval);
-               goto error;
+               goto destroy_modedb;
        }
 
        INIT_DELAYED_WORK(&dev->free_framebuffer_work,
@@ -1736,26 +1727,20 @@ static int ufx_usb_probe(struct usb_interface *interface,
        return 0;
 
 error:
-       if (dev) {
-               if (info) {
-                       if (info->cmap.len != 0)
-                               fb_dealloc_cmap(&info->cmap);
-                       if (info->monspecs.modedb)
-                               fb_destroy_modedb(info->monspecs.modedb);
-                       vfree(info->screen_base);
-
-                       fb_destroy_modelist(&info->modelist);
-
-                       framebuffer_release(info);
-               }
-
-               kref_put(&dev->kref, ufx_free); /* ref for framebuffer */
-               kref_put(&dev->kref, ufx_free); /* last ref from kref_init */
-
-               /* dev has been deallocated. Do not dereference */
-       }
-
+       fb_dealloc_cmap(&info->cmap);
+destroy_modedb:
+       fb_destroy_modedb(info->monspecs.modedb);
+       vfree(info->screen_base);
+       fb_destroy_modelist(&info->modelist);
+       framebuffer_release(info);
+put_ref:
+       kref_put(&dev->kref, ufx_free); /* ref for framebuffer */
+       kref_put(&dev->kref, ufx_free); /* last ref from kref_init */
        return retval;
+
+e_nomem:
+       retval = -ENOMEM;
+       goto put_ref;
 }
 
 static void ufx_usb_disconnect(struct usb_interface *interface)
index f599520374ddf575bba1236b81bec2c4c2d21c49..6439231f2db22ec13a227ed009598d58cadd737f 100644 (file)
@@ -628,7 +628,8 @@ static int ssd1307fb_probe(struct i2c_client *client,
                goto fb_alloc_error;
        }
 
-       ssd1307fb_defio = devm_kzalloc(&client->dev, sizeof(struct fb_deferred_io), GFP_KERNEL);
+       ssd1307fb_defio = devm_kzalloc(&client->dev, sizeof(*ssd1307fb_defio),
+                                      GFP_KERNEL);
        if (!ssd1307fb_defio) {
                dev_err(&client->dev, "Couldn't allocate deferred io.\n");
                ret = -ENOMEM;
index 3c2e4cabc08f8f6d006f15d447f367bdb369978a..045e8afe398be35866adb64d774c0ebf0dd9834c 100644 (file)
@@ -1126,10 +1126,8 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
        int bpp, xres, yres;
 
        fb = kzalloc(sizeof(*fb), GFP_ATOMIC);
-       if (!fb) {
-               printk(KERN_ERR "stifb: Could not allocate stifb structure\n");
-               return -ENODEV;
-       }
+       if (!fb)
+               return -ENOMEM;
        
        info = &fb->info;
 
index 452a4207ac1bec114c1ec7c5c70a85a8ef8e1958..f365d4862015321e9ad8f2efdc072877e505a409 100644 (file)
@@ -428,7 +428,6 @@ static void dlfb_compress_hline(
        const uint16_t *pixel = *pixel_start_ptr;
        uint32_t dev_addr  = *device_address_ptr;
        uint8_t *cmd = *command_buffer_ptr;
-       const int bpp = 2;
 
        while ((pixel_end > pixel) &&
               (cmd_buffer_end - MIN_RLX_CMD_BYTES > cmd)) {
@@ -441,9 +440,9 @@ static void dlfb_compress_hline(
 
                *cmd++ = 0xAF;
                *cmd++ = 0x6B;
-               *cmd++ = (uint8_t) ((dev_addr >> 16) & 0xFF);
-               *cmd++ = (uint8_t) ((dev_addr >> 8) & 0xFF);
-               *cmd++ = (uint8_t) ((dev_addr) & 0xFF);
+               *cmd++ = dev_addr >> 16;
+               *cmd++ = dev_addr >> 8;
+               *cmd++ = dev_addr;
 
                cmd_pixels_count_byte = cmd++; /*  we'll know this later */
                cmd_pixel_start = pixel;
@@ -453,15 +452,15 @@ static void dlfb_compress_hline(
 
                cmd_pixel_end = pixel + min(MAX_CMD_PIXELS + 1,
                        min((int)(pixel_end - pixel),
-                           (int)(cmd_buffer_end - cmd) / bpp));
+                           (int)(cmd_buffer_end - cmd) / BPP));
 
-               prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+               prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * BPP);
 
                while (pixel < cmd_pixel_end) {
                        const uint16_t * const repeating_pixel = pixel;
 
-                       *(uint16_t *)cmd = cpu_to_be16p(pixel);
-                       cmd += 2;
+                       *cmd++ = *pixel >> 8;
+                       *cmd++ = *pixel;
                        pixel++;
 
                        if (unlikely((pixel < cmd_pixel_end) &&
@@ -490,7 +489,7 @@ static void dlfb_compress_hline(
                }
 
                *cmd_pixels_count_byte = (pixel - cmd_pixel_start) & 0xFF;
-               dev_addr += (pixel - cmd_pixel_start) * bpp;
+               dev_addr += (pixel - cmd_pixel_start) * BPP;
        }
 
        if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) {
@@ -1136,7 +1135,6 @@ static struct fb_ops dlfb_ops = {
  */
 static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info)
 {
-       int retval = -ENOMEM;
        int old_len = info->fix.smem_len;
        int new_len;
        unsigned char *old_fb = info->screen_base;
@@ -1152,7 +1150,7 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info
                new_fb = vmalloc(new_len);
                if (!new_fb) {
                        dev_err(info->dev, "Virtual framebuffer alloc failed\n");
-                       goto error;
+                       return -ENOMEM;
                }
 
                if (info->screen_base) {
@@ -1181,11 +1179,7 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info
                        dlfb->backing_buffer = new_back;
                }
        }
-
-       retval = 0;
-
-error:
-       return retval;
+       return 0;
 }
 
 /*
@@ -1531,15 +1525,16 @@ static int dlfb_parse_vendor_descriptor(struct dlfb_data *dlfb,
                        u8 length;
                        u16 key;
 
-                       key = le16_to_cpu(*((u16 *) desc));
-                       desc += sizeof(u16);
-                       length = *desc;
-                       desc++;
+                       key = *desc++;
+                       key |= (u16)*desc++ << 8;
+                       length = *desc++;
 
                        switch (key) {
                        case 0x0200: { /* max_area */
-                               u32 max_area;
-                               max_area = le32_to_cpu(*((u32 *)desc));
+                               u32 max_area = *desc++;
+                               max_area |= (u32)*desc++ << 8;
+                               max_area |= (u32)*desc++ << 16;
+                               max_area |= (u32)*desc++ << 24;
                                dev_warn(&intf->dev,
                                         "DL chip limited to %d pixel modes\n",
                                         max_area);
index 6f8d444eb0e3e90e8e0f59f94400a051381b2639..5172fa5811476c742d8e1efc638b591f3dbe0e03 100644 (file)
@@ -651,7 +651,7 @@ static int vmlfb_check_var_locked(struct fb_var_screeninfo *var,
        }
 
        pitch = ALIGN((var->xres * var->bits_per_pixel) >> 3, 0x40);
-       mem = pitch * var->yres_virtual;
+       mem = (u64)pitch * var->yres_virtual;
        if (mem > vinfo->vram_contig_size) {
                return -ENOMEM;
        }
index ca1b35f033b10c8be52ce244245e27918a763378..c27f62c2c75acc9360a203779583fc50a1dc352f 100644 (file)
@@ -36,7 +36,7 @@ static void probe(struct via_aux_bus *bus, u8 addr)
                .name   =       name};
        /* check vendor id and device id */
        const u8 id[] = {0x01, 0x00, 0x06, 0x00}, len = ARRAY_SIZE(id);
-       u8 tmp[len];
+       u8 tmp[ARRAY_SIZE(id)];
 
        if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
                return;
index 06e742f1f72362cab7373656f098592f222a560b..32978a0ccfd72fb1d64c67d72578e902c6e6bd10 100644 (file)
@@ -36,7 +36,7 @@ void via_aux_vt1631_probe(struct via_aux_bus *bus)
                .name   =       name};
        /* check vendor id and device id */
        const u8 id[] = {0x06, 0x11, 0x91, 0x31}, len = ARRAY_SIZE(id);
-       u8 tmp[len];
+       u8 tmp[ARRAY_SIZE(id)];
 
        if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
                return;
index d24f4cd97401703c667c8c0704e8ff959cd89dc3..cec8cc43d524099e72d3ad737656c30f0b0cbf81 100644 (file)
@@ -36,7 +36,7 @@ static void probe(struct via_aux_bus *bus, u8 addr)
                .name   =       name};
        /* check vendor id and device id */
        const u8 id[] = {0x06, 0x11, 0x92, 0x31}, len = ARRAY_SIZE(id);
-       u8 tmp[len];
+       u8 tmp[ARRAY_SIZE(id)];
 
        if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
                return;
index 9e015c101d4db7c6e3af198e6c00327a9a780515..2b10bc21ab79e80d525e11513c7f09aa268b2b8f 100644 (file)
@@ -36,7 +36,7 @@ void via_aux_vt1636_probe(struct via_aux_bus *bus)
                .name   =       name};
        /* check vendor id and device id */
        const u8 id[] = {0x06, 0x11, 0x45, 0x33}, len = ARRAY_SIZE(id);
-       u8 tmp[len];
+       u8 tmp[ARRAY_SIZE(id)];
 
        if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
                return;
index 8ce0a99bf17ccf0fcb549639ff530dbfd5e0471c..83b8963c9657c6a2ff83bb24bbc6f56e1bf6baad 100644 (file)
@@ -244,23 +244,3 @@ dispfail:
        return NULL;
 }
 EXPORT_SYMBOL_GPL(of_get_display_timings);
-
-/**
- * of_display_timings_exist - check if a display-timings node is provided
- * @np: device_node with the timing
- **/
-int of_display_timings_exist(const struct device_node *np)
-{
-       struct device_node *timings_np;
-
-       if (!np)
-               return -EINVAL;
-
-       timings_np = of_parse_phandle(np, "display-timings", 0);
-       if (!timings_np)
-               return -EINVAL;
-
-       of_node_put(timings_np);
-       return 1;
-}
-EXPORT_SYMBOL_GPL(of_display_timings_exist);
index dfe5684000beb4b3fadd9581712268d7d2715c17..6b237e3f4983046cdc8327b75c173952fbc23f86 100644 (file)
@@ -272,6 +272,12 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb)
                                pages_to_bytes(events[PSWPOUT]));
        update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
        update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+#ifdef CONFIG_HUGETLB_PAGE
+       update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
+                   events[HTLB_BUDDY_PGALLOC]);
+       update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL,
+                   events[HTLB_BUDDY_PGALLOC_FAIL]);
+#endif
 #endif
        update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
                                pages_to_bytes(i.freeram));
index 23e391d3ec015d0c5b38b21619898c282826f59c..b29f4e40851f0cab2894450e61352285fd28b9f4 100644 (file)
@@ -53,6 +53,8 @@ static unsigned long *acpi_ids_done;
 static unsigned long *acpi_id_present;
 /* And if there is an _CST definition (or a PBLK) for the ACPI IDs */
 static unsigned long *acpi_id_cst_present;
+/* Which ACPI P-State dependencies for a enumerated processor */
+static struct acpi_psd_package *acpi_psd;
 
 static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
 {
@@ -362,9 +364,9 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
        }
        /* There are more ACPI Processor objects than in x2APIC or MADT.
         * This can happen with incorrect ACPI SSDT declerations. */
-       if (acpi_id > nr_acpi_bits) {
-               pr_debug("We only have %u, trying to set %u\n",
-                        nr_acpi_bits, acpi_id);
+       if (acpi_id >= nr_acpi_bits) {
+               pr_debug("max acpi id %u, trying to set %u\n",
+                        nr_acpi_bits - 1, acpi_id);
                return AE_OK;
        }
        /* OK, There is a ACPI Processor object */
@@ -372,6 +374,13 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
 
        pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk);
 
+       /* It has P-state dependencies */
+       if (!acpi_processor_get_psd(handle, &acpi_psd[acpi_id])) {
+               pr_debug("ACPI CPU%u w/ PST:coord_type = %llu domain = %llu\n",
+                        acpi_id, acpi_psd[acpi_id].coord_type,
+                        acpi_psd[acpi_id].domain);
+       }
+
        status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
        if (ACPI_FAILURE(status)) {
                if (!pblk)
@@ -405,6 +414,14 @@ static int check_acpi_ids(struct acpi_processor *pr_backup)
                return -ENOMEM;
        }
 
+       acpi_psd = kcalloc(nr_acpi_bits, sizeof(struct acpi_psd_package),
+                          GFP_KERNEL);
+       if (!acpi_psd) {
+               kfree(acpi_id_present);
+               kfree(acpi_id_cst_present);
+               return -ENOMEM;
+       }
+
        acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
                            ACPI_UINT32_MAX,
                            read_acpi_id, NULL, NULL, NULL);
@@ -417,6 +434,12 @@ upload:
                        pr_backup->acpi_id = i;
                        /* Mask out C-states if there are no _CST or PBLK */
                        pr_backup->flags.power = test_bit(i, acpi_id_cst_present);
+                       /* num_entries is non-zero if we evaluated _PSD */
+                       if (acpi_psd[i].num_entries) {
+                               memcpy(&pr_backup->performance->domain_info,
+                                      &acpi_psd[i],
+                                      sizeof(struct acpi_psd_package));
+                       }
                        (void)upload_pm_data(pr_backup);
                }
        }
@@ -566,6 +589,7 @@ static void __exit xen_acpi_processor_exit(void)
        kfree(acpi_ids_done);
        kfree(acpi_id_present);
        kfree(acpi_id_cst_present);
+       kfree(acpi_psd);
        for_each_possible_cpu(i)
                acpi_processor_unregister_performance(i);
 
index a493e99bed2131a33afc65e50110f5e0ff6d5244..0d6d9264d6a9ec47c5af4f0be618bdb51a7f0c7e 100644 (file)
@@ -365,7 +365,7 @@ void xenbus_dev_queue_reply(struct xb_req_data *req)
                        if (WARN_ON(rc))
                                goto out;
                }
-       } else if (req->msg.type == XS_TRANSACTION_END) {
+       } else if (req->type == XS_TRANSACTION_END) {
                trans = xenbus_get_transaction(u, req->msg.tx_id);
                if (WARN_ON(!trans))
                        goto out;
@@ -429,6 +429,10 @@ static int xenbus_write_transaction(unsigned msg_type,
 {
        int rc;
        struct xenbus_transaction_holder *trans = NULL;
+       struct {
+               struct xsd_sockmsg hdr;
+               char body[];
+       } *msg = (void *)u->u.buffer;
 
        if (msg_type == XS_TRANSACTION_START) {
                trans = kzalloc(sizeof(*trans), GFP_KERNEL);
@@ -437,11 +441,15 @@ static int xenbus_write_transaction(unsigned msg_type,
                        goto out;
                }
                list_add(&trans->list, &u->transactions);
-       } else if (u->u.msg.tx_id != 0 &&
-                  !xenbus_get_transaction(u, u->u.msg.tx_id))
+       } else if (msg->hdr.tx_id != 0 &&
+                  !xenbus_get_transaction(u, msg->hdr.tx_id))
                return xenbus_command_reply(u, XS_ERROR, "ENOENT");
+       else if (msg_type == XS_TRANSACTION_END &&
+                !(msg->hdr.len == 2 &&
+                  (!strcmp(msg->body, "T") || !strcmp(msg->body, "F"))))
+               return xenbus_command_reply(u, XS_ERROR, "EINVAL");
 
-       rc = xenbus_dev_request_and_reply(&u->u.msg, u);
+       rc = xenbus_dev_request_and_reply(&msg->hdr, u);
        if (rc && trans) {
                list_del(&trans->list);
                kfree(trans);
index 3f3b29398ab8e2b711ce724cf756ea8c241433cb..49a3874ae6bb45e2c5031fa183a252d78e8d882f 100644 (file)
@@ -140,7 +140,9 @@ void xs_request_exit(struct xb_req_data *req)
        spin_lock(&xs_state_lock);
        xs_state_users--;
        if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) ||
-           req->type == XS_TRANSACTION_END)
+           (req->type == XS_TRANSACTION_END &&
+            !WARN_ON_ONCE(req->msg.type == XS_ERROR &&
+                          !strcmp(req->body, "ENOENT"))))
                xs_state_users--;
        spin_unlock(&xs_state_lock);
 
index 160f6cc26c008a1c906211288178fb9176e5066c..c164698dc30481156eb9738e0c3f1b91d5ab5108 100644 (file)
@@ -585,10 +585,11 @@ static int afs_writepages_region(struct address_space *mapping,
 
                _debug("wback %lx", page->index);
 
-               /* at this point we hold neither mapping->tree_lock nor lock on
-                * the page itself: the page may be truncated or invalidated
-                * (changing page->mapping to NULL), or even swizzled back from
-                * swapper_space to tmpfs file mapping
+               /*
+                * at this point we hold neither the i_pages lock nor the
+                * page lock: the page may be truncated or invalidated
+                * (changing page->mapping to NULL), or even swizzled
+                * back from swapper_space to tmpfs file mapping
                 */
                ret = lock_page_killable(page);
                if (ret < 0) {
index a0c57c37fa21917826cfd85f59b8c929a75c8f6d..be9c3dc048abf91b24d74bb0d839c848a9afcdf6 100644 (file)
@@ -19,9 +19,6 @@
  */
 static autofs_wqt_t autofs4_next_wait_queue = 1;
 
-/* These are the signals we allow interrupting a pending mount */
-#define SHUTDOWN_SIGS  (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT))
-
 void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 {
        struct autofs_wait_queue *wq, *nwq;
@@ -486,29 +483,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
         * wq->name.name is NULL iff the lock is already released
         * or the mount has been made catatonic.
         */
-       if (wq->name.name) {
-               /* Block all but "shutdown" signals while waiting */
-               unsigned long shutdown_sigs_mask;
-               unsigned long irqflags;
-               sigset_t oldset;
-
-               spin_lock_irqsave(&current->sighand->siglock, irqflags);
-               oldset = current->blocked;
-               shutdown_sigs_mask = SHUTDOWN_SIGS & ~oldset.sig[0];
-               siginitsetinv(&current->blocked, shutdown_sigs_mask);
-               recalc_sigpending();
-               spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
-
-               wait_event_interruptible(wq->queue, wq->name.name == NULL);
-
-               spin_lock_irqsave(&current->sighand->siglock, irqflags);
-               current->blocked = oldset;
-               recalc_sigpending();
-               spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
-       } else {
-               pr_debug("skipped sleeping\n");
-       }
-
+       wait_event_killable(wq->queue, wq->name.name == NULL);
        status = wq->status;
 
        /*
@@ -574,7 +549,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
        kfree(wq->name.name);
        wq->name.name = NULL;   /* Do not wait on this queue */
        wq->status = status;
-       wake_up_interruptible(&wq->queue);
+       wake_up(&wq->queue);
        if (!--wq->wait_ctr)
                kfree(wq);
        mutex_unlock(&sbi->wq_mutex);
index ce1824f47ba6a32222e4df6cea90099de101b9f8..c3deb2e35f2030a43fb15a1d918d7bcf01ca1fe4 100644 (file)
@@ -330,6 +330,7 @@ beyond_if:
 #ifdef __alpha__
        regs->gp = ex.a_gpvalue;
 #endif
+       finalize_exec(bprm);
        start_thread(regs, ex.a_entry, current->mm->start_stack);
        return 0;
 }
index bdb201230bae93c408cf1972b6f9bb92ad37e478..41e04183e4ce84a38e18a3997eb6708c3e8234f6 100644 (file)
@@ -377,6 +377,11 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
        } else
                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 
+       if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr))
+               pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n",
+                               task_pid_nr(current), current->comm,
+                               (void *)addr);
+
        return(map_addr);
 }
 
@@ -575,7 +580,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
                                elf_prot |= PROT_EXEC;
                        vaddr = eppnt->p_vaddr;
                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
-                               elf_type |= MAP_FIXED;
+                               elf_type |= MAP_FIXED_NOREPLACE;
                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
                                load_addr = -vaddr;
 
@@ -890,7 +895,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
           the correct location in memory. */
        for(i = 0, elf_ppnt = elf_phdata;
            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
-               int elf_prot = 0, elf_flags;
+               int elf_prot = 0, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
                unsigned long k, vaddr;
                unsigned long total_size = 0;
 
@@ -922,6 +927,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
                                         */
                                }
                        }
+
+                       /*
+                        * Some binaries have overlapping elf segments and then
+                        * we have to forcefully map over an existing mapping
+                        * e.g. over this newly established brk mapping.
+                        */
+                       elf_fixed = MAP_FIXED;
                }
 
                if (elf_ppnt->p_flags & PF_R)
@@ -939,7 +951,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
                 * the ET_DYN load_addr calculations, proceed normally.
                 */
                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
-                       elf_flags |= MAP_FIXED;
+                       elf_flags |= elf_fixed;
                } else if (loc->elf_ex.e_type == ET_DYN) {
                        /*
                         * This logic is run once for the first LOAD Program
@@ -975,7 +987,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
                                load_bias = ELF_ET_DYN_BASE;
                                if (current->flags & PF_RANDOMIZE)
                                        load_bias += arch_mmap_rnd();
-                               elf_flags |= MAP_FIXED;
+                               elf_flags |= elf_fixed;
                        } else
                                load_bias = 0;
 
@@ -1155,6 +1167,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
        ELF_PLAT_INIT(regs, reloc_func_desc);
 #endif
 
+       finalize_exec(bprm);
        start_thread(regs, elf_entry, bprm->p);
        retval = 0;
 out:
@@ -1234,7 +1247,7 @@ static int load_elf_library(struct file *file)
                        (eppnt->p_filesz +
                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
                        PROT_READ | PROT_WRITE | PROT_EXEC,
-                       MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+                       MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
                        (eppnt->p_offset -
                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
        if (error != ELF_PAGESTART(eppnt->p_vaddr))
index 429326b6e2e7088dc1c67c770122beb7b004152b..d90993adeffa3d19186bcb50aeb85110bf971a7f 100644 (file)
@@ -463,6 +463,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
                            dynaddr);
 #endif
 
+       finalize_exec(bprm);
        /* everything is now ready... get the userspace context ready to roll */
        entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
        start_thread(regs, entryaddr, current->mm->start_stack);
index 5d6b94475f272629dc3f8c252749aff08790766d..82a48e8300181423bd589e52e673c2576c3da32d 100644 (file)
@@ -994,6 +994,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
        FLAT_PLAT_INIT(regs);
 #endif
 
+       finalize_exec(bprm);
        pr_debug("start_thread(regs=0x%p, entry=0x%lx, start_stack=0x%lx)\n",
                 regs, start_addr, current->mm->start_stack);
        start_thread(regs, start_addr, current->mm->start_stack);
index 7a506c55a9935e3a3220fb87970a890ccec061a6..7ec920e2706566956759fa27514396a7bfb4afd7 100644 (file)
@@ -1948,11 +1948,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
 static int blkdev_writepages(struct address_space *mapping,
                             struct writeback_control *wbc)
 {
-       if (dax_mapping(mapping)) {
-               struct block_device *bdev = I_BDEV(mapping->host);
-
-               return dax_writeback_mapping_range(mapping, bdev, wbc);
-       }
        return generic_writepages(mapping, wbc);
 }
 
index 562c3e633403d482efd5a90e00e07b25c78d3b2a..578181cd96b5374ad808df5d00c72684b1dbbef3 100644 (file)
@@ -458,7 +458,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                        break;
 
                rcu_read_lock();
-               page = radix_tree_lookup(&mapping->page_tree, pg_index);
+               page = radix_tree_lookup(&mapping->i_pages, pg_index);
                rcu_read_unlock();
                if (page && !radix_tree_exceptional_entry(page)) {
                        misses++;
index 47a8fe9d22e890bddd2d3df7bf9abb027db4647a..cf87976e389d0c1cded8a4452938320a1eb21153 100644 (file)
@@ -3963,11 +3963,11 @@ retry:
 
                        done_index = page->index;
                        /*
-                        * At this point we hold neither mapping->tree_lock nor
-                        * lock on the page itself: the page may be truncated or
-                        * invalidated (changing page->mapping to NULL), or even
-                        * swizzled back from swapper_space to tmpfs file
-                        * mapping
+                        * At this point we hold neither the i_pages lock nor
+                        * the page lock: the page may be truncated or
+                        * invalidated (changing page->mapping to NULL),
+                        * or even swizzled back from swapper_space to
+                        * tmpfs file mapping
                         */
                        if (!trylock_page(page)) {
                                flush_write_bio(epd);
@@ -5174,13 +5174,13 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
                WARN_ON(!PagePrivate(page));
 
                clear_page_dirty_for_io(page);
-               spin_lock_irq(&page->mapping->tree_lock);
+               xa_lock_irq(&page->mapping->i_pages);
                if (!PageDirty(page)) {
-                       radix_tree_tag_clear(&page->mapping->page_tree,
+                       radix_tree_tag_clear(&page->mapping->i_pages,
                                                page_index(page),
                                                PAGECACHE_TAG_DIRTY);
                }
-               spin_unlock_irq(&page->mapping->tree_lock);
+               xa_unlock_irq(&page->mapping->i_pages);
                ClearPageError(page);
                unlock_page(page);
        }
index ec5dd39071e6a308c0c6eefea15d13db3bd6576d..f3491074b035391531ff3d47855bc26b6b3dc86b 100644 (file)
@@ -185,10 +185,9 @@ EXPORT_SYMBOL(end_buffer_write_sync);
  * we get exclusion from try_to_free_buffers with the blockdev mapping's
  * private_lock.
  *
- * Hack idea: for the blockdev mapping, i_bufferlist_lock contention
+ * Hack idea: for the blockdev mapping, private_lock contention
  * may be quite high.  This code could TryLock the page, and if that
- * succeeds, there is no need to take private_lock. (But if
- * private_lock is contended then so is mapping->tree_lock).
+ * succeeds, there is no need to take private_lock.
  */
 static struct buffer_head *
 __find_get_block_slow(struct block_device *bdev, sector_t block)
@@ -594,20 +593,21 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
  *
  * The caller must hold lock_page_memcg().
  */
-static void __set_page_dirty(struct page *page, struct address_space *mapping,
+void __set_page_dirty(struct page *page, struct address_space *mapping,
                             int warn)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&mapping->tree_lock, flags);
+       xa_lock_irqsave(&mapping->i_pages, flags);
        if (page->mapping) {    /* Race with truncate? */
                WARN_ON_ONCE(warn && !PageUptodate(page));
                account_page_dirtied(page, mapping);
-               radix_tree_tag_set(&mapping->page_tree,
+               radix_tree_tag_set(&mapping->i_pages,
                                page_index(page), PAGECACHE_TAG_DIRTY);
        }
-       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
 }
+EXPORT_SYMBOL_GPL(__set_page_dirty);
 
 /*
  * Add a page to the dirty page list.
@@ -1095,7 +1095,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
  * inode list.
  *
  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
- * mapping->tree_lock and mapping->host->i_lock.
+ * i_pages lock and mapping->host->i_lock.
  */
 void mark_buffer_dirty(struct buffer_head *bh)
 {
index 174f5709e5086257bdfd623de7fbb07011c30634..a699e320393f2afba3785d2469da36b2c32350fd 100644 (file)
@@ -6,7 +6,7 @@
 obj-$(CONFIG_CEPH_FS) += ceph.o
 
 ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
-       export.o caps.o snap.o xattr.o \
+       export.o caps.o snap.o xattr.o quota.o \
        mds_client.o mdsmap.o strings.o ceph_frag.o \
        debugfs.o
 
index b4336b42ce3bb1fabec247c01f51d86c2c28490f..5f7ad3d0df2ea69121acded120e26d05515fd79a 100644 (file)
@@ -15,6 +15,7 @@
 #include "mds_client.h"
 #include "cache.h"
 #include <linux/ceph/osd_client.h>
+#include <linux/ceph/striper.h>
 
 /*
  * Ceph address space ops.
@@ -438,7 +439,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 {
        struct inode *inode = file_inode(file);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-       struct ceph_file_info *ci = file->private_data;
+       struct ceph_file_info *fi = file->private_data;
        struct ceph_rw_context *rw_ctx;
        int rc = 0;
        int max = 0;
@@ -452,7 +453,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
        if (rc == 0)
                goto out;
 
-       rw_ctx = ceph_find_rw_context(ci);
+       rw_ctx = ceph_find_rw_context(fi);
        max = fsc->mount_options->rsize >> PAGE_SHIFT;
        dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
             inode, file, rw_ctx, nr_pages, max);
@@ -800,7 +801,7 @@ static int ceph_writepages_start(struct address_space *mapping,
        struct ceph_osd_request *req = NULL;
        struct ceph_writeback_ctl ceph_wbc;
        bool should_loop, range_whole = false;
-       bool stop, done = false;
+       bool done = false;
 
        dout("writepages_start %p (mode=%s)\n", inode,
             wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
@@ -856,7 +857,7 @@ retry:
                 * in that range can be associated with newer snapc.
                 * They are not writeable until we write all dirty pages
                 * associated with 'snapc' get written */
-               if (index > 0 || wbc->sync_mode != WB_SYNC_NONE)
+               if (index > 0)
                        should_loop = true;
                dout(" non-head snapc, range whole\n");
        }
@@ -864,8 +865,7 @@ retry:
        ceph_put_snap_context(last_snapc);
        last_snapc = snapc;
 
-       stop = false;
-       while (!stop && index <= end) {
+       while (!done && index <= end) {
                int num_ops = 0, op_idx;
                unsigned i, pvec_pages, max_pages, locked_pages = 0;
                struct page **pages = NULL, **data_pages;
@@ -898,16 +898,30 @@ get_more_pages:
                                unlock_page(page);
                                continue;
                        }
-                       if (strip_unit_end && (page->index > strip_unit_end)) {
-                               dout("end of strip unit %p\n", page);
+                       /* only if matching snap context */
+                       pgsnapc = page_snap_context(page);
+                       if (pgsnapc != snapc) {
+                               dout("page snapc %p %lld != oldest %p %lld\n",
+                                    pgsnapc, pgsnapc->seq, snapc, snapc->seq);
+                               if (!should_loop &&
+                                   !ceph_wbc.head_snapc &&
+                                   wbc->sync_mode != WB_SYNC_NONE)
+                                       should_loop = true;
                                unlock_page(page);
-                               break;
+                               continue;
                        }
                        if (page_offset(page) >= ceph_wbc.i_size) {
                                dout("%p page eof %llu\n",
                                     page, ceph_wbc.i_size);
-                               /* not done if range_cyclic */
-                               stop = true;
+                               if (ceph_wbc.size_stable ||
+                                   page_offset(page) >= i_size_read(inode))
+                                       mapping->a_ops->invalidatepage(page,
+                                                               0, PAGE_SIZE);
+                               unlock_page(page);
+                               continue;
+                       }
+                       if (strip_unit_end && (page->index > strip_unit_end)) {
+                               dout("end of strip unit %p\n", page);
                                unlock_page(page);
                                break;
                        }
@@ -921,15 +935,6 @@ get_more_pages:
                                wait_on_page_writeback(page);
                        }
 
-                       /* only if matching snap context */
-                       pgsnapc = page_snap_context(page);
-                       if (pgsnapc != snapc) {
-                               dout("page snapc %p %lld != oldest %p %lld\n",
-                                    pgsnapc, pgsnapc->seq, snapc, snapc->seq);
-                               unlock_page(page);
-                               continue;
-                       }
-
                        if (!clear_page_dirty_for_io(page)) {
                                dout("%p !clear_page_dirty_for_io\n", page);
                                unlock_page(page);
@@ -945,19 +950,15 @@ get_more_pages:
                        if (locked_pages == 0) {
                                u64 objnum;
                                u64 objoff;
+                               u32 xlen;
 
                                /* prepare async write request */
                                offset = (u64)page_offset(page);
-                               len = wsize;
-
-                               rc = ceph_calc_file_object_mapping(&ci->i_layout,
-                                                               offset, len,
-                                                               &objnum, &objoff,
-                                                               &len);
-                               if (rc < 0) {
-                                       unlock_page(page);
-                                       break;
-                               }
+                               ceph_calc_file_object_mapping(&ci->i_layout,
+                                                             offset, wsize,
+                                                             &objnum, &objoff,
+                                                             &xlen);
+                               len = xlen;
 
                                num_ops = 1;
                                strip_unit_end = page->index +
@@ -1146,7 +1147,7 @@ new_request:
                 * we tagged for writeback prior to entering this loop.
                 */
                if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
-                       done = stop = true;
+                       done = true;
 
 release_pvec_pages:
                dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
index 33a211b364ed9e230d56a1fd30a5deb311516118..bb524c880b1eadf2915a0a551eb01730871dee9a 100644 (file)
@@ -51,7 +51,7 @@ static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
        .type           = FSCACHE_COOKIE_TYPE_INDEX,
 };
 
-int ceph_fscache_register(void)
+int __init ceph_fscache_register(void)
 {
        return fscache_register_netfs(&ceph_cache_netfs);
 }
@@ -135,7 +135,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
        if (memcmp(data, &aux, sizeof(aux)) != 0)
                return FSCACHE_CHECKAUX_OBSOLETE;
 
-       dout("ceph inode 0x%p cached okay", ci);
+       dout("ceph inode 0x%p cached okay\n", ci);
        return FSCACHE_CHECKAUX_OKAY;
 }
 
index 0e5bd3e3344e7983e6bdf38dba1adfba17500eba..23dbfae1615685dde002b8fa4e77ed54f6a01c0e 100644 (file)
@@ -184,36 +184,54 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
                                         mdsc->caps_avail_count);
        spin_unlock(&mdsc->caps_list_lock);
 
-       for (i = have; i < need; i++) {
-retry:
+       for (i = have; i < need; ) {
                cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
-               if (!cap) {
-                       if (!trimmed) {
-                               for (j = 0; j < mdsc->max_sessions; j++) {
-                                       s = __ceph_lookup_mds_session(mdsc, j);
-                                       if (!s)
-                                               continue;
-                                       mutex_unlock(&mdsc->mutex);
+               if (cap) {
+                       list_add(&cap->caps_item, &newcaps);
+                       alloc++;
+                       i++;
+                       continue;
+               }
 
-                                       mutex_lock(&s->s_mutex);
-                                       max_caps = s->s_nr_caps - (need - i);
-                                       ceph_trim_caps(mdsc, s, max_caps);
-                                       mutex_unlock(&s->s_mutex);
+               if (!trimmed) {
+                       for (j = 0; j < mdsc->max_sessions; j++) {
+                               s = __ceph_lookup_mds_session(mdsc, j);
+                               if (!s)
+                                       continue;
+                               mutex_unlock(&mdsc->mutex);
 
-                                       ceph_put_mds_session(s);
-                                       mutex_lock(&mdsc->mutex);
-                               }
-                               trimmed = true;
-                               goto retry;
-                       } else {
-                               pr_warn("reserve caps ctx=%p ENOMEM "
-                                       "need=%d got=%d\n",
-                                       ctx, need, have + alloc);
-                               goto out_nomem;
+                               mutex_lock(&s->s_mutex);
+                               max_caps = s->s_nr_caps - (need - i);
+                               ceph_trim_caps(mdsc, s, max_caps);
+                               mutex_unlock(&s->s_mutex);
+
+                               ceph_put_mds_session(s);
+                               mutex_lock(&mdsc->mutex);
                        }
+                       trimmed = true;
+
+                       spin_lock(&mdsc->caps_list_lock);
+                       if (mdsc->caps_avail_count) {
+                               int more_have;
+                               if (mdsc->caps_avail_count >= need - i)
+                                       more_have = need - i;
+                               else
+                                       more_have = mdsc->caps_avail_count;
+
+                               i += more_have;
+                               have += more_have;
+                               mdsc->caps_avail_count -= more_have;
+                               mdsc->caps_reserve_count += more_have;
+
+                       }
+                       spin_unlock(&mdsc->caps_list_lock);
+
+                       continue;
                }
-               list_add(&cap->caps_item, &newcaps);
-               alloc++;
+
+               pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
+                       ctx, need, have + alloc);
+               goto out_nomem;
        }
        BUG_ON(have + alloc != need);
 
@@ -234,16 +252,28 @@ retry:
        return 0;
 
 out_nomem:
+
+       spin_lock(&mdsc->caps_list_lock);
+       mdsc->caps_avail_count += have;
+       mdsc->caps_reserve_count -= have;
+
        while (!list_empty(&newcaps)) {
                cap = list_first_entry(&newcaps,
                                struct ceph_cap, caps_item);
                list_del(&cap->caps_item);
-               kmem_cache_free(ceph_cap_cachep, cap);
+
+               /* Keep some preallocated caps around (ceph_min_count), to
+                * avoid lots of free/alloc churn. */
+               if (mdsc->caps_avail_count >=
+                   mdsc->caps_reserve_count + mdsc->caps_min_count) {
+                       kmem_cache_free(ceph_cap_cachep, cap);
+               } else {
+                       mdsc->caps_avail_count++;
+                       mdsc->caps_total_count++;
+                       list_add(&cap->caps_item, &mdsc->caps_list);
+               }
        }
 
-       spin_lock(&mdsc->caps_list_lock);
-       mdsc->caps_avail_count += have;
-       mdsc->caps_reserve_count -= have;
        BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
                                         mdsc->caps_reserve_count +
                                         mdsc->caps_avail_count);
@@ -254,12 +284,26 @@ out_nomem:
 int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
                        struct ceph_cap_reservation *ctx)
 {
+       int i;
+       struct ceph_cap *cap;
+
        dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
        if (ctx->count) {
                spin_lock(&mdsc->caps_list_lock);
                BUG_ON(mdsc->caps_reserve_count < ctx->count);
                mdsc->caps_reserve_count -= ctx->count;
-               mdsc->caps_avail_count += ctx->count;
+               if (mdsc->caps_avail_count >=
+                   mdsc->caps_reserve_count + mdsc->caps_min_count) {
+                       mdsc->caps_total_count -= ctx->count;
+                       for (i = 0; i < ctx->count; i++) {
+                               cap = list_first_entry(&mdsc->caps_list,
+                                       struct ceph_cap, caps_item);
+                               list_del(&cap->caps_item);
+                               kmem_cache_free(ceph_cap_cachep, cap);
+                       }
+               } else {
+                       mdsc->caps_avail_count += ctx->count;
+               }
                ctx->count = 0;
                dout("unreserve caps %d = %d used + %d resv + %d avail\n",
                     mdsc->caps_total_count, mdsc->caps_use_count,
@@ -285,7 +329,23 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
                        mdsc->caps_use_count++;
                        mdsc->caps_total_count++;
                        spin_unlock(&mdsc->caps_list_lock);
+               } else {
+                       spin_lock(&mdsc->caps_list_lock);
+                       if (mdsc->caps_avail_count) {
+                               BUG_ON(list_empty(&mdsc->caps_list));
+
+                               mdsc->caps_avail_count--;
+                               mdsc->caps_use_count++;
+                               cap = list_first_entry(&mdsc->caps_list,
+                                               struct ceph_cap, caps_item);
+                               list_del(&cap->caps_item);
+
+                               BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+                                      mdsc->caps_reserve_count + mdsc->caps_avail_count);
+                       }
+                       spin_unlock(&mdsc->caps_list_lock);
                }
+
                return cap;
        }
 
@@ -341,6 +401,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
 {
        struct ceph_mds_client *mdsc = fsc->mdsc;
 
+       spin_lock(&mdsc->caps_list_lock);
+
        if (total)
                *total = mdsc->caps_total_count;
        if (avail)
@@ -351,6 +413,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
                *reserved = mdsc->caps_reserve_count;
        if (min)
                *min = mdsc->caps_min_count;
+
+       spin_unlock(&mdsc->caps_list_lock);
 }
 
 /*
@@ -639,9 +703,11 @@ void ceph_add_cap(struct inode *inode,
                        }
 
                        spin_lock(&realm->inodes_with_caps_lock);
-                       ci->i_snap_realm = realm;
                        list_add(&ci->i_snap_realm_item,
                                 &realm->inodes_with_caps);
+                       ci->i_snap_realm = realm;
+                       if (realm->ino == ci->i_vino.ino)
+                               realm->inode = inode;
                        spin_unlock(&realm->inodes_with_caps_lock);
 
                        if (oldrealm)
index 644def8137547e103a67e653b9fe3832e0737eb8..abdf98deeec40244693cd731a2d6434d4ebffa09 100644 (file)
@@ -260,7 +260,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                goto out;
 
        fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
-                                       0600,
+                                       0400,
                                        fsc->client->debugfs_dir,
                                        fsc,
                                        &mdsmap_show_fops);
@@ -268,7 +268,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                goto out;
 
        fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions",
-                                       0600,
+                                       0400,
                                        fsc->client->debugfs_dir,
                                        fsc,
                                        &mds_sessions_show_fops);
@@ -276,7 +276,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                goto out;
 
        fsc->debugfs_mdsc = debugfs_create_file("mdsc",
-                                               0600,
+                                               0400,
                                                fsc->client->debugfs_dir,
                                                fsc,
                                                &mdsc_show_fops);
@@ -292,7 +292,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                goto out;
 
        fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
-                                       0600,
+                                       0400,
                                        fsc->client->debugfs_dir,
                                        fsc,
                                        &dentry_lru_show_fops);
index 2bdd561c4c68d38ef92b78eb4829afb88186b188..1a78dd6f8bf27655b03161de2617f7fc57f8f1b7 100644 (file)
@@ -101,18 +101,18 @@ static int fpos_cmp(loff_t l, loff_t r)
  * regardless of what dir changes take place on the
  * server.
  */
-static int note_last_dentry(struct ceph_file_info *fi, const char *name,
+static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
                            int len, unsigned next_offset)
 {
        char *buf = kmalloc(len+1, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
-       kfree(fi->last_name);
-       fi->last_name = buf;
-       memcpy(fi->last_name, name, len);
-       fi->last_name[len] = 0;
-       fi->next_offset = next_offset;
-       dout("note_last_dentry '%s'\n", fi->last_name);
+       kfree(dfi->last_name);
+       dfi->last_name = buf;
+       memcpy(dfi->last_name, name, len);
+       dfi->last_name[len] = 0;
+       dfi->next_offset = next_offset;
+       dout("note_last_dentry '%s'\n", dfi->last_name);
        return 0;
 }
 
@@ -174,7 +174,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
 static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
                            int shared_gen)
 {
-       struct ceph_file_info *fi = file->private_data;
+       struct ceph_dir_file_info *dfi = file->private_data;
        struct dentry *parent = file->f_path.dentry;
        struct inode *dir = d_inode(parent);
        struct dentry *dentry, *last = NULL;
@@ -221,7 +221,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
                bool emit_dentry = false;
                dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl);
                if (!dentry) {
-                       fi->flags |= CEPH_F_ATEND;
+                       dfi->file_info.flags |= CEPH_F_ATEND;
                        err = 0;
                        break;
                }
@@ -272,33 +272,33 @@ out:
        if (last) {
                int ret;
                di = ceph_dentry(last);
-               ret = note_last_dentry(fi, last->d_name.name, last->d_name.len,
+               ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len,
                                       fpos_off(di->offset) + 1);
                if (ret < 0)
                        err = ret;
                dput(last);
                /* last_name no longer match cache index */
-               if (fi->readdir_cache_idx >= 0) {
-                       fi->readdir_cache_idx = -1;
-                       fi->dir_release_count = 0;
+               if (dfi->readdir_cache_idx >= 0) {
+                       dfi->readdir_cache_idx = -1;
+                       dfi->dir_release_count = 0;
                }
        }
        return err;
 }
 
-static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos)
+static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos)
 {
-       if (!fi->last_readdir)
+       if (!dfi->last_readdir)
                return true;
        if (is_hash_order(pos))
-               return !ceph_frag_contains_value(fi->frag, fpos_hash(pos));
+               return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos));
        else
-               return fi->frag != fpos_frag(pos);
+               return dfi->frag != fpos_frag(pos);
 }
 
 static int ceph_readdir(struct file *file, struct dir_context *ctx)
 {
-       struct ceph_file_info *fi = file->private_data;
+       struct ceph_dir_file_info *dfi = file->private_data;
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -309,7 +309,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
        struct ceph_mds_reply_info_parsed *rinfo;
 
        dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
-       if (fi->flags & CEPH_F_ATEND)
+       if (dfi->file_info.flags & CEPH_F_ATEND)
                return 0;
 
        /* always start with . and .. */
@@ -350,15 +350,15 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
        /* proceed with a normal readdir */
 more:
        /* do we have the correct frag content buffered? */
-       if (need_send_readdir(fi, ctx->pos)) {
+       if (need_send_readdir(dfi, ctx->pos)) {
                struct ceph_mds_request *req;
                int op = ceph_snap(inode) == CEPH_SNAPDIR ?
                        CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
 
                /* discard old result, if any */
-               if (fi->last_readdir) {
-                       ceph_mdsc_put_request(fi->last_readdir);
-                       fi->last_readdir = NULL;
+               if (dfi->last_readdir) {
+                       ceph_mdsc_put_request(dfi->last_readdir);
+                       dfi->last_readdir = NULL;
                }
 
                if (is_hash_order(ctx->pos)) {
@@ -372,7 +372,7 @@ more:
                }
 
                dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
-                    ceph_vinop(inode), frag, fi->last_name);
+                    ceph_vinop(inode), frag, dfi->last_name);
                req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
                if (IS_ERR(req))
                        return PTR_ERR(req);
@@ -388,8 +388,8 @@ more:
                        __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
                        req->r_inode_drop = CEPH_CAP_FILE_EXCL;
                }
-               if (fi->last_name) {
-                       req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
+               if (dfi->last_name) {
+                       req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
                        if (!req->r_path2) {
                                ceph_mdsc_put_request(req);
                                return -ENOMEM;
@@ -399,10 +399,10 @@ more:
                                cpu_to_le32(fpos_hash(ctx->pos));
                }
 
-               req->r_dir_release_cnt = fi->dir_release_count;
-               req->r_dir_ordered_cnt = fi->dir_ordered_count;
-               req->r_readdir_cache_idx = fi->readdir_cache_idx;
-               req->r_readdir_offset = fi->next_offset;
+               req->r_dir_release_cnt = dfi->dir_release_count;
+               req->r_dir_ordered_cnt = dfi->dir_ordered_count;
+               req->r_readdir_cache_idx = dfi->readdir_cache_idx;
+               req->r_readdir_offset = dfi->next_offset;
                req->r_args.readdir.frag = cpu_to_le32(frag);
                req->r_args.readdir.flags =
                                cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
@@ -426,35 +426,35 @@ more:
                if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
                        frag = le32_to_cpu(rinfo->dir_dir->frag);
                        if (!rinfo->hash_order) {
-                               fi->next_offset = req->r_readdir_offset;
+                               dfi->next_offset = req->r_readdir_offset;
                                /* adjust ctx->pos to beginning of frag */
                                ctx->pos = ceph_make_fpos(frag,
-                                                         fi->next_offset,
+                                                         dfi->next_offset,
                                                          false);
                        }
                }
 
-               fi->frag = frag;
-               fi->last_readdir = req;
+               dfi->frag = frag;
+               dfi->last_readdir = req;
 
                if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
-                       fi->readdir_cache_idx = req->r_readdir_cache_idx;
-                       if (fi->readdir_cache_idx < 0) {
+                       dfi->readdir_cache_idx = req->r_readdir_cache_idx;
+                       if (dfi->readdir_cache_idx < 0) {
                                /* preclude from marking dir ordered */
-                               fi->dir_ordered_count = 0;
+                               dfi->dir_ordered_count = 0;
                        } else if (ceph_frag_is_leftmost(frag) &&
-                                  fi->next_offset == 2) {
+                                  dfi->next_offset == 2) {
                                /* note dir version at start of readdir so
                                 * we can tell if any dentries get dropped */
-                               fi->dir_release_count = req->r_dir_release_cnt;
-                               fi->dir_ordered_count = req->r_dir_ordered_cnt;
+                               dfi->dir_release_count = req->r_dir_release_cnt;
+                               dfi->dir_ordered_count = req->r_dir_ordered_cnt;
                        }
                } else {
-                       dout("readdir !did_prepopulate");
+                       dout("readdir !did_prepopulate\n");
                        /* disable readdir cache */
-                       fi->readdir_cache_idx = -1;
+                       dfi->readdir_cache_idx = -1;
                        /* preclude from marking dir complete */
-                       fi->dir_release_count = 0;
+                       dfi->dir_release_count = 0;
                }
 
                /* note next offset and last dentry name */
@@ -463,19 +463,19 @@ more:
                                        rinfo->dir_entries + (rinfo->dir_nr-1);
                        unsigned next_offset = req->r_reply_info.dir_end ?
                                        2 : (fpos_off(rde->offset) + 1);
-                       err = note_last_dentry(fi, rde->name, rde->name_len,
+                       err = note_last_dentry(dfi, rde->name, rde->name_len,
                                               next_offset);
                        if (err)
                                return err;
                } else if (req->r_reply_info.dir_end) {
-                       fi->next_offset = 2;
+                       dfi->next_offset = 2;
                        /* keep last name */
                }
        }
 
-       rinfo = &fi->last_readdir->r_reply_info;
+       rinfo = &dfi->last_readdir->r_reply_info;
        dout("readdir frag %x num %d pos %llx chunk first %llx\n",
-            fi->frag, rinfo->dir_nr, ctx->pos,
+            dfi->frag, rinfo->dir_nr, ctx->pos,
             rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
 
        i = 0;
@@ -519,52 +519,55 @@ more:
                ctx->pos++;
        }
 
-       ceph_mdsc_put_request(fi->last_readdir);
-       fi->last_readdir = NULL;
+       ceph_mdsc_put_request(dfi->last_readdir);
+       dfi->last_readdir = NULL;
 
-       if (fi->next_offset > 2) {
-               frag = fi->frag;
+       if (dfi->next_offset > 2) {
+               frag = dfi->frag;
                goto more;
        }
 
        /* more frags? */
-       if (!ceph_frag_is_rightmost(fi->frag)) {
-               frag = ceph_frag_next(fi->frag);
+       if (!ceph_frag_is_rightmost(dfi->frag)) {
+               frag = ceph_frag_next(dfi->frag);
                if (is_hash_order(ctx->pos)) {
                        loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
-                                                       fi->next_offset, true);
+                                                       dfi->next_offset, true);
                        if (new_pos > ctx->pos)
                                ctx->pos = new_pos;
                        /* keep last_name */
                } else {
-                       ctx->pos = ceph_make_fpos(frag, fi->next_offset, false);
-                       kfree(fi->last_name);
-                       fi->last_name = NULL;
+                       ctx->pos = ceph_make_fpos(frag, dfi->next_offset,
+                                                       false);
+                       kfree(dfi->last_name);
+                       dfi->last_name = NULL;
                }
                dout("readdir next frag is %x\n", frag);
                goto more;
        }
-       fi->flags |= CEPH_F_ATEND;
+       dfi->file_info.flags |= CEPH_F_ATEND;
 
        /*
         * if dir_release_count still matches the dir, no dentries
         * were released during the whole readdir, and we should have
         * the complete dir contents in our cache.
         */
-       if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) {
+       if (atomic64_read(&ci->i_release_count) ==
+                                       dfi->dir_release_count) {
                spin_lock(&ci->i_ceph_lock);
-               if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) {
+               if (dfi->dir_ordered_count ==
+                               atomic64_read(&ci->i_ordered_count)) {
                        dout(" marking %p complete and ordered\n", inode);
                        /* use i_size to track number of entries in
                         * readdir cache */
-                       BUG_ON(fi->readdir_cache_idx < 0);
-                       i_size_write(inode, fi->readdir_cache_idx *
+                       BUG_ON(dfi->readdir_cache_idx < 0);
+                       i_size_write(inode, dfi->readdir_cache_idx *
                                     sizeof(struct dentry*));
                } else {
                        dout(" marking %p complete\n", inode);
                }
-               __ceph_dir_set_complete(ci, fi->dir_release_count,
-                                       fi->dir_ordered_count);
+               __ceph_dir_set_complete(ci, dfi->dir_release_count,
+                                       dfi->dir_ordered_count);
                spin_unlock(&ci->i_ceph_lock);
        }
 
@@ -572,25 +575,25 @@ more:
        return 0;
 }
 
-static void reset_readdir(struct ceph_file_info *fi)
+static void reset_readdir(struct ceph_dir_file_info *dfi)
 {
-       if (fi->last_readdir) {
-               ceph_mdsc_put_request(fi->last_readdir);
-               fi->last_readdir = NULL;
+       if (dfi->last_readdir) {
+               ceph_mdsc_put_request(dfi->last_readdir);
+               dfi->last_readdir = NULL;
        }
-       kfree(fi->last_name);
-       fi->last_name = NULL;
-       fi->dir_release_count = 0;
-       fi->readdir_cache_idx = -1;
-       fi->next_offset = 2;  /* compensate for . and .. */
-       fi->flags &= ~CEPH_F_ATEND;
+       kfree(dfi->last_name);
+       dfi->last_name = NULL;
+       dfi->dir_release_count = 0;
+       dfi->readdir_cache_idx = -1;
+       dfi->next_offset = 2;  /* compensate for . and .. */
+       dfi->file_info.flags &= ~CEPH_F_ATEND;
 }
 
 /*
  * discard buffered readdir content on seekdir(0), or seek to new frag,
  * or seek prior to current chunk
  */
-static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
+static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos)
 {
        struct ceph_mds_reply_info_parsed *rinfo;
        loff_t chunk_offset;
@@ -599,10 +602,10 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
        if (is_hash_order(new_pos)) {
                /* no need to reset last_name for a forward seek when
                 * dentries are sotred in hash order */
-       } else if (fi->frag != fpos_frag(new_pos)) {
+       } else if (dfi->frag != fpos_frag(new_pos)) {
                return true;
        }
-       rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
+       rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL;
        if (!rinfo || !rinfo->dir_nr)
                return true;
        chunk_offset = rinfo->dir_entries[0].offset;
@@ -612,7 +615,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
 
 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 {
-       struct ceph_file_info *fi = file->private_data;
+       struct ceph_dir_file_info *dfi = file->private_data;
        struct inode *inode = file->f_mapping->host;
        loff_t retval;
 
@@ -630,20 +633,20 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
        }
 
        if (offset >= 0) {
-               if (need_reset_readdir(fi, offset)) {
+               if (need_reset_readdir(dfi, offset)) {
                        dout("dir_llseek dropping %p content\n", file);
-                       reset_readdir(fi);
+                       reset_readdir(dfi);
                } else if (is_hash_order(offset) && offset > file->f_pos) {
                        /* for hash offset, we don't know if a forward seek
                         * is within same frag */
-                       fi->dir_release_count = 0;
-                       fi->readdir_cache_idx = -1;
+                       dfi->dir_release_count = 0;
+                       dfi->readdir_cache_idx = -1;
                }
 
                if (offset != file->f_pos) {
                        file->f_pos = offset;
                        file->f_version = 0;
-                       fi->flags &= ~CEPH_F_ATEND;
+                       dfi->file_info.flags &= ~CEPH_F_ATEND;
                }
                retval = offset;
        }
@@ -824,6 +827,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
        if (ceph_snap(dir) != CEPH_NOSNAP)
                return -EROFS;
 
+       if (ceph_quota_is_max_files_exceeded(dir))
+               return -EDQUOT;
+
        err = ceph_pre_init_acls(dir, &mode, &acls);
        if (err < 0)
                return err;
@@ -877,6 +883,9 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
        if (ceph_snap(dir) != CEPH_NOSNAP)
                return -EROFS;
 
+       if (ceph_quota_is_max_files_exceeded(dir))
+               return -EDQUOT;
+
        dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
        if (IS_ERR(req)) {
@@ -926,6 +935,12 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
                goto out;
        }
 
+       if (op == CEPH_MDS_OP_MKDIR &&
+           ceph_quota_is_max_files_exceeded(dir)) {
+               err = -EDQUOT;
+               goto out;
+       }
+
        mode |= S_IFDIR;
        err = ceph_pre_init_acls(dir, &mode, &acls);
        if (err < 0)
@@ -1065,6 +1080,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
                else
                        return -EROFS;
        }
+       /* don't allow cross-quota renames */
+       if ((old_dir != new_dir) &&
+           (!ceph_quota_is_same_realm(old_dir, new_dir)))
+               return -EXDEV;
+
        dout("rename dir %p dentry %p to dir %p dentry %p\n",
             old_dir, old_dentry, new_dir, new_dentry);
        req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1351,7 +1371,7 @@ static void ceph_d_prune(struct dentry *dentry)
 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
                             loff_t *ppos)
 {
-       struct ceph_file_info *cf = file->private_data;
+       struct ceph_dir_file_info *dfi = file->private_data;
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        int left;
@@ -1360,12 +1380,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
        if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
                return -EISDIR;
 
-       if (!cf->dir_info) {
-               cf->dir_info = kmalloc(bufsize, GFP_KERNEL);
-               if (!cf->dir_info)
+       if (!dfi->dir_info) {
+               dfi->dir_info = kmalloc(bufsize, GFP_KERNEL);
+               if (!dfi->dir_info)
                        return -ENOMEM;
-               cf->dir_info_len =
-                       snprintf(cf->dir_info, bufsize,
+               dfi->dir_info_len =
+                       snprintf(dfi->dir_info, bufsize,
                                "entries:   %20lld\n"
                                " files:    %20lld\n"
                                " subdirs:  %20lld\n"
@@ -1385,10 +1405,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
                                (long)ci->i_rctime.tv_nsec);
        }
 
-       if (*ppos >= cf->dir_info_len)
+       if (*ppos >= dfi->dir_info_len)
                return 0;
-       size = min_t(unsigned, size, cf->dir_info_len-*ppos);
-       left = copy_to_user(buf, cf->dir_info + *ppos, size);
+       size = min_t(unsigned, size, dfi->dir_info_len-*ppos);
+       left = copy_to_user(buf, dfi->dir_info + *ppos, size);
        if (left == size)
                return -EFAULT;
        *ppos += (size - left);
index b67eec3532a125c0c8ca0c97b7b0f22c650f7966..f85040d73e3dcaa0214196ca0c242eadd0db475d 100644 (file)
@@ -30,6 +30,8 @@ static __le32 ceph_flags_sys2wire(u32 flags)
                break;
        }
 
+       flags &= ~O_ACCMODE;
+
 #define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; }
 
        ceph_sys2wire(O_CREAT);
@@ -41,7 +43,7 @@ static __le32 ceph_flags_sys2wire(u32 flags)
 #undef ceph_sys2wire
 
        if (flags)
-               dout("unused open flags: %x", flags);
+               dout("unused open flags: %x\n", flags);
 
        return cpu_to_le32(wire_flags);
 }
@@ -159,13 +161,50 @@ out:
        return req;
 }
 
+static int ceph_init_file_info(struct inode *inode, struct file *file,
+                                       int fmode, bool isdir)
+{
+       struct ceph_file_info *fi;
+
+       dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
+                       inode->i_mode, isdir ? "dir" : "regular");
+       BUG_ON(inode->i_fop->release != ceph_release);
+
+       if (isdir) {
+               struct ceph_dir_file_info *dfi =
+                       kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
+               if (!dfi) {
+                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+                       return -ENOMEM;
+               }
+
+               file->private_data = dfi;
+               fi = &dfi->file_info;
+               dfi->next_offset = 2;
+               dfi->readdir_cache_idx = -1;
+       } else {
+               fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
+               if (!fi) {
+                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+                       return -ENOMEM;
+               }
+
+               file->private_data = fi;
+       }
+
+       fi->fmode = fmode;
+       spin_lock_init(&fi->rw_contexts_lock);
+       INIT_LIST_HEAD(&fi->rw_contexts);
+
+       return 0;
+}
+
 /*
  * initialize private struct file data.
  * if we fail, clean up by dropping fmode reference on the ceph_inode
  */
 static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 {
-       struct ceph_file_info *cf;
        int ret = 0;
 
        switch (inode->i_mode & S_IFMT) {
@@ -173,22 +212,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
                ceph_fscache_register_inode_cookie(inode);
                ceph_fscache_file_set_cookie(inode, file);
        case S_IFDIR:
-               dout("init_file %p %p 0%o (regular)\n", inode, file,
-                    inode->i_mode);
-               cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
-               if (!cf) {
-                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
-                       return -ENOMEM;
-               }
-               cf->fmode = fmode;
-
-               spin_lock_init(&cf->rw_contexts_lock);
-               INIT_LIST_HEAD(&cf->rw_contexts);
-
-               cf->next_offset = 2;
-               cf->readdir_cache_idx = -1;
-               file->private_data = cf;
-               BUG_ON(inode->i_fop->release != ceph_release);
+               ret = ceph_init_file_info(inode, file, fmode,
+                                               S_ISDIR(inode->i_mode));
+               if (ret)
+                       return ret;
                break;
 
        case S_IFLNK:
@@ -278,11 +305,11 @@ int ceph_open(struct inode *inode, struct file *file)
        struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
-       struct ceph_file_info *cf = file->private_data;
+       struct ceph_file_info *fi = file->private_data;
        int err;
        int flags, fmode, wanted;
 
-       if (cf) {
+       if (fi) {
                dout("open file %p is already opened\n", file);
                return 0;
        }
@@ -375,7 +402,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
        struct ceph_mds_request *req;
        struct dentry *dn;
        struct ceph_acls_info acls = {};
-       int mask;
+       int mask;
        int err;
 
        dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
@@ -386,6 +413,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                return -ENAMETOOLONG;
 
        if (flags & O_CREAT) {
+               if (ceph_quota_is_max_files_exceeded(dir))
+                       return -EDQUOT;
                err = ceph_pre_init_acls(dir, &mode, &acls);
                if (err < 0)
                        return err;
@@ -460,16 +489,27 @@ out_acl:
 int ceph_release(struct inode *inode, struct file *file)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_file_info *cf = file->private_data;
 
-       dout("release inode %p file %p\n", inode, file);
-       ceph_put_fmode(ci, cf->fmode);
-       if (cf->last_readdir)
-               ceph_mdsc_put_request(cf->last_readdir);
-       kfree(cf->last_name);
-       kfree(cf->dir_info);
-       WARN_ON(!list_empty(&cf->rw_contexts));
-       kmem_cache_free(ceph_file_cachep, cf);
+       if (S_ISDIR(inode->i_mode)) {
+               struct ceph_dir_file_info *dfi = file->private_data;
+               dout("release inode %p dir file %p\n", inode, file);
+               WARN_ON(!list_empty(&dfi->file_info.rw_contexts));
+
+               ceph_put_fmode(ci, dfi->file_info.fmode);
+
+               if (dfi->last_readdir)
+                       ceph_mdsc_put_request(dfi->last_readdir);
+               kfree(dfi->last_name);
+               kfree(dfi->dir_info);
+               kmem_cache_free(ceph_dir_file_cachep, dfi);
+       } else {
+               struct ceph_file_info *fi = file->private_data;
+               dout("release inode %p regular file %p\n", inode, file);
+               WARN_ON(!list_empty(&fi->rw_contexts));
+
+               ceph_put_fmode(ci, fi->fmode);
+               kmem_cache_free(ceph_file_cachep, fi);
+       }
 
        /* wake up anyone waiting for caps on this inode */
        wake_up_all(&ci->i_cap_wq);
@@ -1338,6 +1378,11 @@ retry_snap:
 
        pos = iocb->ki_pos;
        count = iov_iter_count(from);
+       if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) {
+               err = -EDQUOT;
+               goto out;
+       }
+
        err = file_remove_privs(file);
        if (err)
                goto out;
@@ -1419,6 +1464,7 @@ retry_snap:
 
        if (written >= 0) {
                int dirty;
+
                spin_lock(&ci->i_ceph_lock);
                ci->i_inline_version = CEPH_INLINE_NONE;
                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
@@ -1426,6 +1472,8 @@ retry_snap:
                spin_unlock(&ci->i_ceph_lock);
                if (dirty)
                        __mark_inode_dirty(inode, dirty);
+               if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
+                       ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
        }
 
        dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
@@ -1668,6 +1716,12 @@ static long ceph_fallocate(struct file *file, int mode,
                goto unlock;
        }
 
+       if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)) &&
+           ceph_quota_is_max_bytes_exceeded(inode, offset + length)) {
+               ret = -EDQUOT;
+               goto unlock;
+       }
+
        if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) &&
            !(mode & FALLOC_FL_PUNCH_HOLE)) {
                ret = -ENOSPC;
@@ -1716,6 +1770,9 @@ static long ceph_fallocate(struct file *file, int mode,
                spin_unlock(&ci->i_ceph_lock);
                if (dirty)
                        __mark_inode_dirty(inode, dirty);
+               if ((endoff > size) &&
+                   ceph_quota_is_max_bytes_approaching(inode, endoff))
+                       ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
        }
 
        ceph_put_cap_refs(ci, got);
index c6ec5aa461002796325e84796199dd0ed2dd0a67..8bf60250309e359ffd6a34f89c75991b3a1c3c48 100644 (file)
@@ -441,6 +441,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        atomic64_set(&ci->i_complete_seq[1], 0);
        ci->i_symlink = NULL;
 
+       ci->i_max_bytes = 0;
+       ci->i_max_files = 0;
+
        memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
        RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL);
 
@@ -536,6 +539,9 @@ void ceph_destroy_inode(struct inode *inode)
 
        ceph_queue_caps_release(inode);
 
+       if (__ceph_has_any_quota(ci))
+               ceph_adjust_quota_realms_count(inode, false);
+
        /*
         * we may still have a snap_realm reference if there are stray
         * caps in i_snap_caps.
@@ -548,6 +554,9 @@ void ceph_destroy_inode(struct inode *inode)
                dout(" dropping residual ref to snap realm %p\n", realm);
                spin_lock(&realm->inodes_with_caps_lock);
                list_del_init(&ci->i_snap_realm_item);
+               ci->i_snap_realm = NULL;
+               if (realm->ino == ci->i_vino.ino)
+                       realm->inode = NULL;
                spin_unlock(&realm->inodes_with_caps_lock);
                ceph_put_snap_realm(mdsc, realm);
        }
@@ -790,6 +799,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
        inode->i_rdev = le32_to_cpu(info->rdev);
        inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
+       __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
+
        if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
            (issued & CEPH_CAP_AUTH_EXCL) == 0) {
                inode->i_mode = le32_to_cpu(info->mode);
@@ -1867,20 +1878,9 @@ retry:
         * possibly truncate them.. so write AND block!
         */
        if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
-               struct ceph_cap_snap *capsnap;
-               to = ci->i_truncate_size;
-               list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
-                       // MDS should have revoked Frw caps
-                       WARN_ON_ONCE(capsnap->writing);
-                       if (capsnap->dirty_pages && capsnap->size > to)
-                               to = capsnap->size;
-               }
                spin_unlock(&ci->i_ceph_lock);
                dout("__do_pending_vmtruncate %p flushing snaps first\n",
                     inode);
-
-               truncate_pagecache(inode, to);
-
                filemap_write_and_wait_range(&inode->i_data, 0,
                                             inode->i_sb->s_maxbytes);
                goto retry;
@@ -2152,6 +2152,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        if (err != 0)
                return err;
 
+       if ((attr->ia_valid & ATTR_SIZE) &&
+           ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
+               return -EDQUOT;
+
        err = __ceph_setattr(inode, attr);
 
        if (err >= 0 && (attr->ia_valid & ATTR_MODE))
index 851aa69ec8f0dae6ec4837b2dc3807c4420cc751..c90f03beb15d0b70ef0455015cf2343746227574 100644 (file)
@@ -5,7 +5,7 @@
 #include "super.h"
 #include "mds_client.h"
 #include "ioctl.h"
-
+#include <linux/ceph/striper.h>
 
 /*
  * ioctls
@@ -185,7 +185,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
                &ceph_sb_to_client(inode->i_sb)->client->osdc;
        struct ceph_object_locator oloc;
        CEPH_DEFINE_OID_ONSTACK(oid);
-       u64 len = 1, olen;
+       u32 xlen;
        u64 tmp;
        struct ceph_pg pgid;
        int r;
@@ -195,13 +195,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
                return -EFAULT;
 
        down_read(&osdc->lock);
-       r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
-                                         &dl.object_no, &dl.object_offset,
-                                         &olen);
-       if (r < 0) {
-               up_read(&osdc->lock);
-               return -EIO;
-       }
+       ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, 1,
+                                     &dl.object_no, &dl.object_offset, &xlen);
        dl.file_offset -= dl.object_offset;
        dl.object_size = ci->i_layout.object_size;
        dl.block_size = ci->i_layout.stripe_unit;
index 9e66f69ee8a5ecc9e8455465f232bf70529e59a8..9dae2ec7e1fa89705f649b1c3349520b8bb23543 100644 (file)
@@ -95,7 +95,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
        owner = secure_addr(fl->fl_owner);
 
        dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
-            "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
+            "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type,
             (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
             wait, fl->fl_type);
 
@@ -132,7 +132,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
        }
        ceph_mdsc_put_request(req);
        dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
-            "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
+            "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type,
             (int)operation, (u64)fl->fl_pid, fl->fl_start,
             length, wait, fl->fl_type, err);
        return err;
@@ -226,7 +226,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
        if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
                return -ENOLCK;
 
-       dout("ceph_lock, fl_owner: %p", fl->fl_owner);
+       dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
 
        /* set wait bit as appropriate, then make command as Ceph expects it*/
        if (IS_GETLK(cmd))
@@ -264,7 +264,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
        err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
        if (!err) {
                if (op == CEPH_MDS_OP_SETFILELOCK) {
-                       dout("mds locked, locking locally");
+                       dout("mds locked, locking locally\n");
                        err = posix_lock_file(file, fl, NULL);
                        if (err) {
                                /* undo! This should only happen if
@@ -272,7 +272,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
                                 * deadlock. */
                                ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
                                                  CEPH_LOCK_UNLOCK, 0, fl);
-                               dout("got %d on posix_lock_file, undid lock",
+                               dout("got %d on posix_lock_file, undid lock\n",
                                     err);
                        }
                }
@@ -294,7 +294,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
        if (fl->fl_type & LOCK_MAND)
                return -EOPNOTSUPP;
 
-       dout("ceph_flock, fl_file: %p", fl->fl_file);
+       dout("ceph_flock, fl_file: %p\n", fl->fl_file);
 
        spin_lock(&ci->i_ceph_lock);
        if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
@@ -329,7 +329,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
                        ceph_lock_message(CEPH_LOCK_FLOCK,
                                          CEPH_MDS_OP_SETFILELOCK,
                                          inode, CEPH_LOCK_UNLOCK, 0, fl);
-                       dout("got %d on locks_lock_file_wait, undid lock", err);
+                       dout("got %d on locks_lock_file_wait, undid lock\n", err);
                }
        }
        return err;
@@ -356,7 +356,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
                        ++(*flock_count);
                spin_unlock(&ctx->flc_lock);
        }
-       dout("counted %d flock locks and %d fcntl locks",
+       dout("counted %d flock locks and %d fcntl locks\n",
             *flock_count, *fcntl_count);
 }
 
@@ -384,7 +384,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
                cephlock->type = CEPH_LOCK_UNLOCK;
                break;
        default:
-               dout("Have unknown lock type %d", lock->fl_type);
+               dout("Have unknown lock type %d\n", lock->fl_type);
                err = -EINVAL;
        }
 
@@ -407,7 +407,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
        int seen_flock = 0;
        int l = 0;
 
-       dout("encoding %d flock and %d fcntl locks", num_flock_locks,
+       dout("encoding %d flock and %d fcntl locks\n", num_flock_locks,
             num_fcntl_locks);
 
        if (!ctx)
index 2e8f90f96540291ab412703133d2a4278edbac71..5ece2e6ad1548e0893ee9734132a64bb66ffc89e 100644 (file)
@@ -100,6 +100,26 @@ static int parse_reply_info_in(void **p, void *end,
        } else
                info->inline_version = CEPH_INLINE_NONE;
 
+       if (features & CEPH_FEATURE_MDS_QUOTA) {
+               u8 struct_v, struct_compat;
+               u32 struct_len;
+
+               /*
+                * both struct_v and struct_compat are expected to be >= 1
+                */
+               ceph_decode_8_safe(p, end, struct_v, bad);
+               ceph_decode_8_safe(p, end, struct_compat, bad);
+               if (!struct_v || !struct_compat)
+                       goto bad;
+               ceph_decode_32_safe(p, end, struct_len, bad);
+               ceph_decode_need(p, end, struct_len, bad);
+               ceph_decode_64_safe(p, end, info->max_bytes, bad);
+               ceph_decode_64_safe(p, end, info->max_files, bad);
+       } else {
+               info->max_bytes = 0;
+               info->max_files = 0;
+       }
+
        info->pool_ns_len = 0;
        info->pool_ns_data = NULL;
        if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
@@ -384,7 +404,7 @@ static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
                     refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref));
                return s;
        } else {
-               dout("mdsc get_session %p 0 -- FAIL", s);
+               dout("mdsc get_session %p 0 -- FAIL\n", s);
                return NULL;
        }
 }
@@ -419,9 +439,10 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
 
 static bool __have_session(struct ceph_mds_client *mdsc, int mds)
 {
-       if (mds >= mdsc->max_sessions)
+       if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
                return false;
-       return mdsc->sessions[mds];
+       else
+               return true;
 }
 
 static int __verify_registered_session(struct ceph_mds_client *mdsc,
@@ -448,6 +469,25 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
        s = kzalloc(sizeof(*s), GFP_NOFS);
        if (!s)
                return ERR_PTR(-ENOMEM);
+
+       if (mds >= mdsc->max_sessions) {
+               int newmax = 1 << get_count_order(mds + 1);
+               struct ceph_mds_session **sa;
+
+               dout("%s: realloc to %d\n", __func__, newmax);
+               sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
+               if (!sa)
+                       goto fail_realloc;
+               if (mdsc->sessions) {
+                       memcpy(sa, mdsc->sessions,
+                              mdsc->max_sessions * sizeof(void *));
+                       kfree(mdsc->sessions);
+               }
+               mdsc->sessions = sa;
+               mdsc->max_sessions = newmax;
+       }
+
+       dout("%s: mds%d\n", __func__, mds);
        s->s_mdsc = mdsc;
        s->s_mds = mds;
        s->s_state = CEPH_MDS_SESSION_NEW;
@@ -476,23 +516,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
        INIT_LIST_HEAD(&s->s_cap_releases);
        INIT_LIST_HEAD(&s->s_cap_flushing);
 
-       dout("register_session mds%d\n", mds);
-       if (mds >= mdsc->max_sessions) {
-               int newmax = 1 << get_count_order(mds+1);
-               struct ceph_mds_session **sa;
-
-               dout("register_session realloc to %d\n", newmax);
-               sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
-               if (!sa)
-                       goto fail_realloc;
-               if (mdsc->sessions) {
-                       memcpy(sa, mdsc->sessions,
-                              mdsc->max_sessions * sizeof(void *));
-                       kfree(mdsc->sessions);
-               }
-               mdsc->sessions = sa;
-               mdsc->max_sessions = newmax;
-       }
        mdsc->sessions[mds] = s;
        atomic_inc(&mdsc->num_sessions);
        refcount_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
@@ -2531,10 +2554,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
         * Otherwise we just have to return an ESTALE
         */
        if (result == -ESTALE) {
-               dout("got ESTALE on request %llu", req->r_tid);
+               dout("got ESTALE on request %llu\n", req->r_tid);
                req->r_resend_mds = -1;
                if (req->r_direct_mode != USE_AUTH_MDS) {
-                       dout("not using auth, setting for that now");
+                       dout("not using auth, setting for that now\n");
                        req->r_direct_mode = USE_AUTH_MDS;
                        __do_request(mdsc, req);
                        mutex_unlock(&mdsc->mutex);
@@ -2542,13 +2565,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
                } else  {
                        int mds = __choose_mds(mdsc, req);
                        if (mds >= 0 && mds != req->r_session->s_mds) {
-                               dout("but auth changed, so resending");
+                               dout("but auth changed, so resending\n");
                                __do_request(mdsc, req);
                                mutex_unlock(&mdsc->mutex);
                                goto out;
                        }
                }
-               dout("have to return ESTALE on request %llu", req->r_tid);
+               dout("have to return ESTALE on request %llu\n", req->r_tid);
        }
 
 
@@ -3470,13 +3493,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
 }
 
 /*
- * drop all leases (and dentry refs) in preparation for umount
+ * lock unlock sessions, to wait ongoing session activities
  */
-static void drop_leases(struct ceph_mds_client *mdsc)
+static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
 {
        int i;
 
-       dout("drop_leases\n");
        mutex_lock(&mdsc->mutex);
        for (i = 0; i < mdsc->max_sessions; i++) {
                struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
@@ -3572,7 +3594,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        if (!mdsc)
                return -ENOMEM;
        mdsc->fsc = fsc;
-       fsc->mdsc = mdsc;
        mutex_init(&mdsc->mutex);
        mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
        if (!mdsc->mdsmap) {
@@ -3580,6 +3601,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
                return -ENOMEM;
        }
 
+       fsc->mdsc = mdsc;
        init_completion(&mdsc->safe_umount_waiters);
        init_waitqueue_head(&mdsc->session_close_wq);
        INIT_LIST_HEAD(&mdsc->waiting_for_map);
@@ -3587,6 +3609,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        atomic_set(&mdsc->num_sessions, 0);
        mdsc->max_sessions = 0;
        mdsc->stopping = 0;
+       atomic64_set(&mdsc->quotarealms_count, 0);
        mdsc->last_snap_seq = 0;
        init_rwsem(&mdsc->snap_rwsem);
        mdsc->snap_realms = RB_ROOT;
@@ -3660,7 +3683,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
        dout("pre_umount\n");
        mdsc->stopping = 1;
 
-       drop_leases(mdsc);
+       lock_unlock_sessions(mdsc);
        ceph_flush_dirty_caps(mdsc);
        wait_requests(mdsc);
 
@@ -3858,6 +3881,9 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
        struct ceph_mds_client *mdsc = fsc->mdsc;
        dout("mdsc_destroy %p\n", mdsc);
 
+       if (!mdsc)
+               return;
+
        /* flush out any connection work with references to us */
        ceph_msgr_flush();
 
@@ -4077,6 +4103,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
        case CEPH_MSG_CLIENT_LEASE:
                handle_lease(mdsc, s, msg);
                break;
+       case CEPH_MSG_CLIENT_QUOTA:
+               ceph_handle_quota(mdsc, s, msg);
+               break;
 
        default:
                pr_err("received unknown message type %d %s\n", type,
index 71e3b783ee6fae5a64ae0a4a3eb1471c9fd9ed44..2ec3b5b350671a1fa1e574ae67d6a67567762dd8 100644 (file)
@@ -49,6 +49,8 @@ struct ceph_mds_reply_info_in {
        char *inline_data;
        u32 pool_ns_len;
        char *pool_ns_data;
+       u64 max_bytes;
+       u64 max_files;
 };
 
 struct ceph_mds_reply_dir_entry {
@@ -312,6 +314,8 @@ struct ceph_mds_client {
        int                     max_sessions;  /* len of s_mds_sessions */
        int                     stopping;      /* true if shutting down */
 
+       atomic64_t              quotarealms_count; /* # realms with quota */
+
        /*
         * snap_rwsem will cover cap linkage into snaprealms, and
         * realm snap contexts.  (later, we can do per-realm snap
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
new file mode 100644 (file)
index 0000000..242bfa5
--- /dev/null
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * quota.c - CephFS quota
+ *
+ * Copyright (C) 2017-2018 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/statfs.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
+{
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+       if (inc)
+               atomic64_inc(&mdsc->quotarealms_count);
+       else
+               atomic64_dec(&mdsc->quotarealms_count);
+}
+
+static inline bool ceph_has_realms_with_quotas(struct inode *inode)
+{
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+       return atomic64_read(&mdsc->quotarealms_count) > 0;
+}
+
+void ceph_handle_quota(struct ceph_mds_client *mdsc,
+                      struct ceph_mds_session *session,
+                      struct ceph_msg *msg)
+{
+       struct super_block *sb = mdsc->fsc->sb;
+       struct ceph_mds_quota *h = msg->front.iov_base;
+       struct ceph_vino vino;
+       struct inode *inode;
+       struct ceph_inode_info *ci;
+
+       if (msg->front.iov_len != sizeof(*h)) {
+               pr_err("%s corrupt message mds%d len %d\n", __func__,
+                      session->s_mds, (int)msg->front.iov_len);
+               ceph_msg_dump(msg);
+               return;
+       }
+
+       /* increment msg sequence number */
+       mutex_lock(&session->s_mutex);
+       session->s_seq++;
+       mutex_unlock(&session->s_mutex);
+
+       /* lookup inode */
+       vino.ino = le64_to_cpu(h->ino);
+       vino.snap = CEPH_NOSNAP;
+       inode = ceph_find_inode(sb, vino);
+       if (!inode) {
+               pr_warn("Failed to find inode %llu\n", vino.ino);
+               return;
+       }
+       ci = ceph_inode(inode);
+
+       spin_lock(&ci->i_ceph_lock);
+       ci->i_rbytes = le64_to_cpu(h->rbytes);
+       ci->i_rfiles = le64_to_cpu(h->rfiles);
+       ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
+       __ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
+                           le64_to_cpu(h->max_files));
+       spin_unlock(&ci->i_ceph_lock);
+
+       iput(inode);
+}
+
+/*
+ * This function walks through the snaprealm for an inode and returns the
+ * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
+ * or max_bytes).  If the root is reached, return the root ceph_snap_realm
+ * instead.
+ *
+ * Note that the caller is responsible for calling ceph_put_snap_realm() on the
+ * returned realm.
+ */
+static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
+                                              struct inode *inode)
+{
+       struct ceph_inode_info *ci = NULL;
+       struct ceph_snap_realm *realm, *next;
+       struct inode *in;
+       bool has_quota;
+
+       if (ceph_snap(inode) != CEPH_NOSNAP)
+               return NULL;
+
+       realm = ceph_inode(inode)->i_snap_realm;
+       if (realm)
+               ceph_get_snap_realm(mdsc, realm);
+       else
+               pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
+                                  "null i_snap_realm\n", ceph_vinop(inode));
+       while (realm) {
+               spin_lock(&realm->inodes_with_caps_lock);
+               in = realm->inode ? igrab(realm->inode) : NULL;
+               spin_unlock(&realm->inodes_with_caps_lock);
+               if (!in)
+                       break;
+
+               ci = ceph_inode(in);
+               has_quota = __ceph_has_any_quota(ci);
+               iput(in);
+
+               next = realm->parent;
+               if (has_quota || !next)
+                      return realm;
+
+               ceph_get_snap_realm(mdsc, next);
+               ceph_put_snap_realm(mdsc, realm);
+               realm = next;
+       }
+       if (realm)
+               ceph_put_snap_realm(mdsc, realm);
+
+       return NULL;
+}
+
+bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
+{
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
+       struct ceph_snap_realm *old_realm, *new_realm;
+       bool is_same;
+
+       down_read(&mdsc->snap_rwsem);
+       old_realm = get_quota_realm(mdsc, old);
+       new_realm = get_quota_realm(mdsc, new);
+       is_same = (old_realm == new_realm);
+       up_read(&mdsc->snap_rwsem);
+
+       if (old_realm)
+               ceph_put_snap_realm(mdsc, old_realm);
+       if (new_realm)
+               ceph_put_snap_realm(mdsc, new_realm);
+
+       return is_same;
+}
+
+enum quota_check_op {
+       QUOTA_CHECK_MAX_FILES_OP,       /* check quota max_files limit */
+       QUOTA_CHECK_MAX_BYTES_OP,       /* check quota max_files limit */
+       QUOTA_CHECK_MAX_BYTES_APPROACHING_OP    /* check if quota max_files
+                                                  limit is approaching */
+};
+
+/*
+ * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
+ * realm, it will execute quota check operation defined by the 'op' parameter.
+ * The snaprealm walk is interrupted if the quota check detects that the quota
+ * is exceeded or if the root inode is reached.
+ */
+static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
+                                loff_t delta)
+{
+       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+       struct ceph_inode_info *ci;
+       struct ceph_snap_realm *realm, *next;
+       struct inode *in;
+       u64 max, rvalue;
+       bool exceeded = false;
+
+       if (ceph_snap(inode) != CEPH_NOSNAP)
+               return false;
+
+       down_read(&mdsc->snap_rwsem);
+       realm = ceph_inode(inode)->i_snap_realm;
+       if (realm)
+               ceph_get_snap_realm(mdsc, realm);
+       else
+               pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
+                                  "null i_snap_realm\n", ceph_vinop(inode));
+       while (realm) {
+               spin_lock(&realm->inodes_with_caps_lock);
+               in = realm->inode ? igrab(realm->inode) : NULL;
+               spin_unlock(&realm->inodes_with_caps_lock);
+               if (!in)
+                       break;
+
+               ci = ceph_inode(in);
+               spin_lock(&ci->i_ceph_lock);
+               if (op == QUOTA_CHECK_MAX_FILES_OP) {
+                       max = ci->i_max_files;
+                       rvalue = ci->i_rfiles + ci->i_rsubdirs;
+               } else {
+                       max = ci->i_max_bytes;
+                       rvalue = ci->i_rbytes;
+               }
+               spin_unlock(&ci->i_ceph_lock);
+               switch (op) {
+               case QUOTA_CHECK_MAX_FILES_OP:
+                       exceeded = (max && (rvalue >= max));
+                       break;
+               case QUOTA_CHECK_MAX_BYTES_OP:
+                       exceeded = (max && (rvalue + delta > max));
+                       break;
+               case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
+                       if (max) {
+                               if (rvalue >= max)
+                                       exceeded = true;
+                               else {
+                                       /*
+                                        * when we're writing more that 1/16th
+                                        * of the available space
+                                        */
+                                       exceeded =
+                                               (((max - rvalue) >> 4) < delta);
+                               }
+                       }
+                       break;
+               default:
+                       /* Shouldn't happen */
+                       pr_warn("Invalid quota check op (%d)\n", op);
+                       exceeded = true; /* Just break the loop */
+               }
+               iput(in);
+
+               next = realm->parent;
+               if (exceeded || !next)
+                       break;
+               ceph_get_snap_realm(mdsc, next);
+               ceph_put_snap_realm(mdsc, realm);
+               realm = next;
+       }
+       ceph_put_snap_realm(mdsc, realm);
+       up_read(&mdsc->snap_rwsem);
+
+       return exceeded;
+}
+
+/*
+ * ceph_quota_is_max_files_exceeded - check if we can create a new file
+ * @inode:     directory where a new file is being created
+ *
+ * This functions returns true is max_files quota allows a new file to be
+ * created.  It is necessary to walk through the snaprealm hierarchy (until the
+ * FS root) to check all realms with quotas set.
+ */
+bool ceph_quota_is_max_files_exceeded(struct inode *inode)
+{
+       if (!ceph_has_realms_with_quotas(inode))
+               return false;
+
+       WARN_ON(!S_ISDIR(inode->i_mode));
+
+       return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0);
+}
+
+/*
+ * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
+ * @inode:     inode being written
+ * @newsize:   new size if write succeeds
+ *
+ * This functions returns true is max_bytes quota allows a file size to reach
+ * @newsize; it returns false otherwise.
+ */
+bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
+{
+       loff_t size = i_size_read(inode);
+
+       if (!ceph_has_realms_with_quotas(inode))
+               return false;
+
+       /* return immediately if we're decreasing file size */
+       if (newsize <= size)
+               return false;
+
+       return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
+}
+
+/*
+ * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
+ * @inode:     inode being written
+ * @newsize:   new size if write succeeds
+ *
+ * This function returns true if the new file size @newsize will be consuming
+ * more than 1/16th of the available quota space; it returns false otherwise.
+ */
+bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
+{
+       loff_t size = ceph_inode(inode)->i_reported_size;
+
+       if (!ceph_has_realms_with_quotas(inode))
+               return false;
+
+       /* return immediately if we're decreasing file size */
+       if (newsize <= size)
+               return false;
+
+       return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
+                                   (newsize - size));
+}
+
+/*
+ * ceph_quota_update_statfs - if root has quota update statfs with quota status
+ * @fsc:       filesystem client instance
+ * @buf:       statfs to update
+ *
+ * If the mounted filesystem root has max_bytes quota set, update the filesystem
+ * statistics with the quota status.
+ *
+ * This function returns true if the stats have been updated, false otherwise.
+ */
+bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
+{
+       struct ceph_mds_client *mdsc = fsc->mdsc;
+       struct ceph_inode_info *ci;
+       struct ceph_snap_realm *realm;
+       struct inode *in;
+       u64 total = 0, used, free;
+       bool is_updated = false;
+
+       down_read(&mdsc->snap_rwsem);
+       realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root));
+       up_read(&mdsc->snap_rwsem);
+       if (!realm)
+               return false;
+
+       spin_lock(&realm->inodes_with_caps_lock);
+       in = realm->inode ? igrab(realm->inode) : NULL;
+       spin_unlock(&realm->inodes_with_caps_lock);
+       if (in) {
+               ci = ceph_inode(in);
+               spin_lock(&ci->i_ceph_lock);
+               if (ci->i_max_bytes) {
+                       total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
+                       used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
+                       /* It is possible for a quota to be exceeded.
+                        * Report 'zero' in that case
+                        */
+                       free = total > used ? total - used : 0;
+               }
+               spin_unlock(&ci->i_ceph_lock);
+               if (total) {
+                       buf->f_blocks = total;
+                       buf->f_bfree = free;
+                       buf->f_bavail = free;
+                       is_updated = true;
+               }
+               iput(in);
+       }
+       ceph_put_snap_realm(mdsc, realm);
+
+       return is_updated;
+}
+
index 07cf95e6413d775d01c383f3d3d7e0e8e4879910..041c27ea8de155a0002bdb5af25eb2fc5f8e6efa 100644 (file)
@@ -931,6 +931,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
                        list_add(&ci->i_snap_realm_item,
                                 &realm->inodes_with_caps);
                        ci->i_snap_realm = realm;
+                       if (realm->ino == ci->i_vino.ino)
+                                realm->inode = inode;
                        spin_unlock(&realm->inodes_with_caps_lock);
 
                        spin_unlock(&ci->i_ceph_lock);
index fb2bc9c15a2378ceb5712269ab93e677c360f5d0..b33082e6878f1ca1ba8820716dafb1924d77855d 100644 (file)
@@ -76,9 +76,18 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
         */
        buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
        buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
-       buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
-       buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
-       buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+
+       /*
+        * By default use root quota for stats; fallback to overall filesystem
+        * usage if using 'noquotadf' mount option or if the root dir doesn't
+        * have max_bytes quota set.
+        */
+       if (ceph_test_mount_opt(fsc, NOQUOTADF) ||
+           !ceph_quota_update_statfs(fsc, buf)) {
+               buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
+               buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+               buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+       }
 
        buf->f_files = le64_to_cpu(st.num_objects);
        buf->f_ffree = -1;
@@ -151,6 +160,8 @@ enum {
        Opt_acl,
 #endif
        Opt_noacl,
+       Opt_quotadf,
+       Opt_noquotadf,
 };
 
 static match_table_t fsopt_tokens = {
@@ -187,6 +198,8 @@ static match_table_t fsopt_tokens = {
        {Opt_acl, "acl"},
 #endif
        {Opt_noacl, "noacl"},
+       {Opt_quotadf, "quotadf"},
+       {Opt_noquotadf, "noquotadf"},
        {-1, NULL}
 };
 
@@ -314,13 +327,16 @@ static int parse_fsopt_token(char *c, void *private)
                break;
        case Opt_fscache:
                fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+               kfree(fsopt->fscache_uniq);
+               fsopt->fscache_uniq = NULL;
                break;
        case Opt_nofscache:
                fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
+               kfree(fsopt->fscache_uniq);
+               fsopt->fscache_uniq = NULL;
                break;
        case Opt_poolperm:
                fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
-               printk ("pool perm");
                break;
        case Opt_nopoolperm:
                fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
@@ -331,6 +347,12 @@ static int parse_fsopt_token(char *c, void *private)
        case Opt_norequire_active_mds:
                fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
                break;
+       case Opt_quotadf:
+               fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF;
+               break;
+       case Opt_noquotadf:
+               fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF;
+               break;
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
        case Opt_acl:
                fsopt->sb_flags |= SB_POSIXACL;
@@ -513,13 +535,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
        if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
                seq_puts(m, ",nodcache");
        if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
-               if (fsopt->fscache_uniq)
-                       seq_printf(m, ",fsc=%s", fsopt->fscache_uniq);
-               else
-                       seq_puts(m, ",fsc");
+               seq_show_option(m, "fsc", fsopt->fscache_uniq);
        }
        if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
                seq_puts(m, ",nopoolperm");
+       if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF)
+               seq_puts(m, ",noquotadf");
 
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
        if (fsopt->sb_flags & SB_POSIXACL)
@@ -529,7 +550,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 #endif
 
        if (fsopt->mds_namespace)
-               seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace);
+               seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
        if (fsopt->wsize)
                seq_printf(m, ",wsize=%d", fsopt->wsize);
        if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -679,6 +700,7 @@ struct kmem_cache *ceph_cap_cachep;
 struct kmem_cache *ceph_cap_flush_cachep;
 struct kmem_cache *ceph_dentry_cachep;
 struct kmem_cache *ceph_file_cachep;
+struct kmem_cache *ceph_dir_file_cachep;
 
 static void ceph_inode_init_once(void *foo)
 {
@@ -698,8 +720,7 @@ static int __init init_caches(void)
        if (!ceph_inode_cachep)
                return -ENOMEM;
 
-       ceph_cap_cachep = KMEM_CACHE(ceph_cap,
-                                    SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+       ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD);
        if (!ceph_cap_cachep)
                goto bad_cap;
        ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
@@ -716,6 +737,10 @@ static int __init init_caches(void)
        if (!ceph_file_cachep)
                goto bad_file;
 
+       ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD);
+       if (!ceph_dir_file_cachep)
+               goto bad_dir_file;
+
        error = ceph_fscache_register();
        if (error)
                goto bad_fscache;
@@ -723,6 +748,8 @@ static int __init init_caches(void)
        return 0;
 
 bad_fscache:
+       kmem_cache_destroy(ceph_dir_file_cachep);
+bad_dir_file:
        kmem_cache_destroy(ceph_file_cachep);
 bad_file:
        kmem_cache_destroy(ceph_dentry_cachep);
@@ -748,6 +775,7 @@ static void destroy_caches(void)
        kmem_cache_destroy(ceph_cap_flush_cachep);
        kmem_cache_destroy(ceph_dentry_cachep);
        kmem_cache_destroy(ceph_file_cachep);
+       kmem_cache_destroy(ceph_dir_file_cachep);
 
        ceph_fscache_unregister();
 }
index 1c2086e0fec27a60577c7a676b00fac67fb11102..a7077a0c989fb33cde837a41e6b4d8b9951d2c43 100644 (file)
@@ -39,6 +39,7 @@
 #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
 #define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
 #define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
+#define CEPH_MOUNT_OPT_NOQUOTADF       (1<<13) /* no root dir quota in statfs */
 
 #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
 
@@ -310,6 +311,9 @@ struct ceph_inode_info {
        u64 i_rbytes, i_rfiles, i_rsubdirs;
        u64 i_files, i_subdirs;
 
+       /* quotas */
+       u64 i_max_bytes, i_max_files;
+
        struct rb_root i_fragtree;
        int i_fragtree_nsplits;
        struct mutex i_fragtree_mutex;
@@ -671,6 +675,10 @@ struct ceph_file_info {
 
        spinlock_t rw_contexts_lock;
        struct list_head rw_contexts;
+};
+
+struct ceph_dir_file_info {
+       struct ceph_file_info file_info;
 
        /* readdir: position within the dir */
        u32 frag;
@@ -748,6 +756,7 @@ struct ceph_readdir_cache_control {
  */
 struct ceph_snap_realm {
        u64 ino;
+       struct inode *inode;
        atomic_t nref;
        struct rb_node node;
 
@@ -1066,4 +1075,37 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
 extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
 
+/* quota.c */
+static inline bool __ceph_has_any_quota(struct ceph_inode_info *ci)
+{
+       return ci->i_max_files || ci->i_max_bytes;
+}
+
+extern void ceph_adjust_quota_realms_count(struct inode *inode, bool inc);
+
+static inline void __ceph_update_quota(struct ceph_inode_info *ci,
+                                      u64 max_bytes, u64 max_files)
+{
+       bool had_quota, has_quota;
+       had_quota = __ceph_has_any_quota(ci);
+       ci->i_max_bytes = max_bytes;
+       ci->i_max_files = max_files;
+       has_quota = __ceph_has_any_quota(ci);
+
+       if (had_quota != has_quota)
+               ceph_adjust_quota_realms_count(&ci->vfs_inode, has_quota);
+}
+
+extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
+                             struct ceph_mds_session *session,
+                             struct ceph_msg *msg);
+extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
+extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
+extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
+                                            loff_t newlen);
+extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
+                                               loff_t newlen);
+extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
+                                    struct kstatfs *buf);
+
 #endif /* _FS_CEPH_SUPER_H */
index e1c4e0b12b4cd1309af51bad916120c7f2273fc4..7e72348639e4bcbf523ec4bec3a589fbcd643c7c 100644 (file)
@@ -224,6 +224,31 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
                        (long)ci->i_rctime.tv_nsec);
 }
 
+/* quotas */
+
+static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
+{
+       return (ci->i_max_files || ci->i_max_bytes);
+}
+
+static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
+                                 size_t size)
+{
+       return snprintf(val, size, "max_bytes=%llu max_files=%llu",
+                       ci->i_max_bytes, ci->i_max_files);
+}
+
+static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
+                                           char *val, size_t size)
+{
+       return snprintf(val, size, "%llu", ci->i_max_bytes);
+}
+
+static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
+                                           char *val, size_t size)
+{
+       return snprintf(val, size, "%llu", ci->i_max_files);
+}
 
 #define CEPH_XATTR_NAME(_type, _name)  XATTR_CEPH_PREFIX #_type "." #_name
 #define CEPH_XATTR_NAME2(_type, _name, _name2) \
@@ -247,6 +272,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
                .hidden = true,                 \
                .exists_cb = ceph_vxattrcb_layout_exists,       \
        }
+#define XATTR_QUOTA_FIELD(_type, _name)                                        \
+       {                                                               \
+               .name = CEPH_XATTR_NAME(_type, _name),                  \
+               .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)),     \
+               .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name,   \
+               .readonly = false,                                      \
+               .hidden = true,                                         \
+               .exists_cb = ceph_vxattrcb_quota_exists,                \
+       }
 
 static struct ceph_vxattr ceph_dir_vxattrs[] = {
        {
@@ -270,6 +304,16 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
        XATTR_NAME_CEPH(dir, rsubdirs),
        XATTR_NAME_CEPH(dir, rbytes),
        XATTR_NAME_CEPH(dir, rctime),
+       {
+               .name = "ceph.quota",
+               .name_size = sizeof("ceph.quota"),
+               .getxattr_cb = ceph_vxattrcb_quota,
+               .readonly = false,
+               .hidden = true,
+               .exists_cb = ceph_vxattrcb_quota_exists,
+       },
+       XATTR_QUOTA_FIELD(quota, max_bytes),
+       XATTR_QUOTA_FIELD(quota, max_files),
        { .name = NULL, 0 }     /* Required table terminator */
 };
 static size_t ceph_dir_vxattrs_name_size;      /* total size of all names */
index 7cee97b93a614736e77414162bdf46c432475a40..4bcd4e838b475071f6e054ab9c89891dbef3d6fe 100644 (file)
@@ -1987,11 +1987,10 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
        for (i = 0; i < found_pages; i++) {
                page = wdata->pages[i];
                /*
-                * At this point we hold neither mapping->tree_lock nor
-                * lock on the page itself: the page may be truncated or
-                * invalidated (changing page->mapping to NULL), or even
-                * swizzled back from swapper_space to tmpfs file
-                * mapping
+                * At this point we hold neither the i_pages lock nor the
+                * page lock: the page may be truncated or invalidated
+                * (changing page->mapping to NULL), or even swizzled
+                * back from swapper_space to tmpfs file mapping
                 */
 
                if (nr_pages == 0)
index 0276df90e86c588f1908409fee23c0152e0d63a6..aaec72ded1b63c9a2944a228d201be239f77ada1 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -73,16 +73,15 @@ fs_initcall(init_dax_wait_table);
 #define RADIX_DAX_ZERO_PAGE    (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
 #define RADIX_DAX_EMPTY                (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
 
-static unsigned long dax_radix_sector(void *entry)
+static unsigned long dax_radix_pfn(void *entry)
 {
        return (unsigned long)entry >> RADIX_DAX_SHIFT;
 }
 
-static void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
+static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
 {
        return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
-                       ((unsigned long)sector << RADIX_DAX_SHIFT) |
-                       RADIX_DAX_ENTRY_LOCK);
+                       (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
 }
 
 static unsigned int dax_radix_order(void *entry)
@@ -159,11 +158,9 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mo
 }
 
 /*
- * We do not necessarily hold the mapping->tree_lock when we call this
- * function so it is possible that 'entry' is no longer a valid item in the
- * radix tree.  This is okay because all we really need to do is to find the
- * correct waitqueue where tasks might be waiting for that old 'entry' and
- * wake them.
+ * @entry may no longer be the entry at the index in the mapping.
+ * The important information it's conveying is whether the entry at
+ * this index used to be a PMD entry.
  */
 static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
                pgoff_t index, void *entry, bool wake_all)
@@ -175,7 +172,7 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 
        /*
         * Checking for locked entry and prepare_to_wait_exclusive() happens
-        * under mapping->tree_lock, ditto for entry handling in our callers.
+        * under the i_pages lock, ditto for entry handling in our callers.
         * So at this point all tasks that could have seen our entry locked
         * must be in the waitqueue and the following check will see them.
         */
@@ -184,41 +181,39 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 }
 
 /*
- * Check whether the given slot is locked. The function must be called with
- * mapping->tree_lock held
+ * Check whether the given slot is locked.  Must be called with the i_pages
+ * lock held.
  */
 static inline int slot_locked(struct address_space *mapping, void **slot)
 {
        unsigned long entry = (unsigned long)
-               radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+               radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
        return entry & RADIX_DAX_ENTRY_LOCK;
 }
 
 /*
- * Mark the given slot is locked. The function must be called with
- * mapping->tree_lock held
+ * Mark the given slot as locked.  Must be called with the i_pages lock held.
  */
 static inline void *lock_slot(struct address_space *mapping, void **slot)
 {
        unsigned long entry = (unsigned long)
-               radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+               radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
 
        entry |= RADIX_DAX_ENTRY_LOCK;
-       radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
+       radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
        return (void *)entry;
 }
 
 /*
- * Mark the given slot is unlocked. The function must be called with
- * mapping->tree_lock held
+ * Mark the given slot as unlocked.  Must be called with the i_pages lock held.
  */
 static inline void *unlock_slot(struct address_space *mapping, void **slot)
 {
        unsigned long entry = (unsigned long)
-               radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+               radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
 
        entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
-       radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
+       radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
        return (void *)entry;
 }
 
@@ -229,7 +224,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
  * put_locked_mapping_entry() when he locked the entry and now wants to
  * unlock it.
  *
- * The function must be called with mapping->tree_lock held.
+ * Must be called with the i_pages lock held.
  */
 static void *get_unlocked_mapping_entry(struct address_space *mapping,
                                        pgoff_t index, void ***slotp)
@@ -242,7 +237,7 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
        ewait.wait.func = wake_exceptional_entry_func;
 
        for (;;) {
-               entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
+               entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
                                          &slot);
                if (!entry ||
                    WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
@@ -255,10 +250,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
                wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key);
                prepare_to_wait_exclusive(wq, &ewait.wait,
                                          TASK_UNINTERRUPTIBLE);
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                schedule();
                finish_wait(wq, &ewait.wait);
-               spin_lock_irq(&mapping->tree_lock);
+               xa_lock_irq(&mapping->i_pages);
        }
 }
 
@@ -267,15 +262,15 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
 {
        void *entry, **slot;
 
-       spin_lock_irq(&mapping->tree_lock);
-       entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
+       xa_lock_irq(&mapping->i_pages);
+       entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
        if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
                         !slot_locked(mapping, slot))) {
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                return;
        }
        unlock_slot(mapping, slot);
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
        dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
@@ -299,6 +294,63 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
        dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
+static unsigned long dax_entry_size(void *entry)
+{
+       if (dax_is_zero_entry(entry))
+               return 0;
+       else if (dax_is_empty_entry(entry))
+               return 0;
+       else if (dax_is_pmd_entry(entry))
+               return PMD_SIZE;
+       else
+               return PAGE_SIZE;
+}
+
+static unsigned long dax_radix_end_pfn(void *entry)
+{
+       return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
+}
+
+/*
+ * Iterate through all mapped pfns represented by an entry, i.e. skip
+ * 'empty' and 'zero' entries.
+ */
+#define for_each_mapped_pfn(entry, pfn) \
+       for (pfn = dax_radix_pfn(entry); \
+                       pfn < dax_radix_end_pfn(entry); pfn++)
+
+static void dax_associate_entry(void *entry, struct address_space *mapping)
+{
+       unsigned long pfn;
+
+       if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+               return;
+
+       for_each_mapped_pfn(entry, pfn) {
+               struct page *page = pfn_to_page(pfn);
+
+               WARN_ON_ONCE(page->mapping);
+               page->mapping = mapping;
+       }
+}
+
+static void dax_disassociate_entry(void *entry, struct address_space *mapping,
+               bool trunc)
+{
+       unsigned long pfn;
+
+       if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+               return;
+
+       for_each_mapped_pfn(entry, pfn) {
+               struct page *page = pfn_to_page(pfn);
+
+               WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
+               WARN_ON_ONCE(page->mapping && page->mapping != mapping);
+               page->mapping = NULL;
+       }
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
@@ -332,7 +384,7 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
        void *entry, **slot;
 
 restart:
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
        entry = get_unlocked_mapping_entry(mapping, index, &slot);
 
        if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
@@ -364,12 +416,12 @@ restart:
                if (pmd_downgrade) {
                        /*
                         * Make sure 'entry' remains valid while we drop
-                        * mapping->tree_lock.
+                        * the i_pages lock.
                         */
                        entry = lock_slot(mapping, slot);
                }
 
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                /*
                 * Besides huge zero pages the only other thing that gets
                 * downgraded are empty entries which don't need to be
@@ -386,26 +438,27 @@ restart:
                                put_locked_mapping_entry(mapping, index);
                        return ERR_PTR(err);
                }
-               spin_lock_irq(&mapping->tree_lock);
+               xa_lock_irq(&mapping->i_pages);
 
                if (!entry) {
                        /*
-                        * We needed to drop the page_tree lock while calling
+                        * We needed to drop the i_pages lock while calling
                         * radix_tree_preload() and we didn't have an entry to
                         * lock.  See if another thread inserted an entry at
                         * our index during this time.
                         */
-                       entry = __radix_tree_lookup(&mapping->page_tree, index,
+                       entry = __radix_tree_lookup(&mapping->i_pages, index,
                                        NULL, &slot);
                        if (entry) {
                                radix_tree_preload_end();
-                               spin_unlock_irq(&mapping->tree_lock);
+                               xa_unlock_irq(&mapping->i_pages);
                                goto restart;
                        }
                }
 
                if (pmd_downgrade) {
-                       radix_tree_delete(&mapping->page_tree, index);
+                       dax_disassociate_entry(entry, mapping, false);
+                       radix_tree_delete(&mapping->i_pages, index);
                        mapping->nrexceptional--;
                        dax_wake_mapping_entry_waiter(mapping, index, entry,
                                        true);
@@ -413,11 +466,11 @@ restart:
 
                entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
 
-               err = __radix_tree_insert(&mapping->page_tree, index,
+               err = __radix_tree_insert(&mapping->i_pages, index,
                                dax_radix_order(entry), entry);
                radix_tree_preload_end();
                if (err) {
-                       spin_unlock_irq(&mapping->tree_lock);
+                       xa_unlock_irq(&mapping->i_pages);
                        /*
                         * Our insertion of a DAX entry failed, most likely
                         * because we were inserting a PMD entry and it
@@ -430,12 +483,12 @@ restart:
                }
                /* Good, we have inserted empty locked entry into the tree. */
                mapping->nrexceptional++;
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                return entry;
        }
        entry = lock_slot(mapping, slot);
  out_unlock:
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
        return entry;
 }
 
@@ -444,22 +497,23 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
 {
        int ret = 0;
        void *entry;
-       struct radix_tree_root *page_tree = &mapping->page_tree;
+       struct radix_tree_root *pages = &mapping->i_pages;
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(pages);
        entry = get_unlocked_mapping_entry(mapping, index, NULL);
        if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
                goto out;
        if (!trunc &&
-           (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
-            radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
+           (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
+            radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)))
                goto out;
-       radix_tree_delete(page_tree, index);
+       dax_disassociate_entry(entry, mapping, trunc);
+       radix_tree_delete(pages, index);
        mapping->nrexceptional--;
        ret = 1;
 out:
        put_unlocked_mapping_entry(mapping, index, entry);
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(pages);
        return ret;
 }
 /*
@@ -526,12 +580,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
  */
 static void *dax_insert_mapping_entry(struct address_space *mapping,
                                      struct vm_fault *vmf,
-                                     void *entry, sector_t sector,
+                                     void *entry, pfn_t pfn_t,
                                      unsigned long flags, bool dirty)
 {
-       struct radix_tree_root *page_tree = &mapping->page_tree;
-       void *new_entry;
+       struct radix_tree_root *pages = &mapping->i_pages;
+       unsigned long pfn = pfn_t_to_pfn(pfn_t);
        pgoff_t index = vmf->pgoff;
+       void *new_entry;
 
        if (dirty)
                __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -545,8 +600,12 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
                        unmap_mapping_pages(mapping, vmf->pgoff, 1, false);
        }
 
-       spin_lock_irq(&mapping->tree_lock);
-       new_entry = dax_radix_locked_entry(sector, flags);
+       xa_lock_irq(pages);
+       new_entry = dax_radix_locked_entry(pfn, flags);
+       if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+               dax_disassociate_entry(entry, mapping, false);
+               dax_associate_entry(new_entry, mapping);
+       }
 
        if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
                /*
@@ -561,17 +620,17 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
                void **slot;
                void *ret;
 
-               ret = __radix_tree_lookup(page_tree, index, &node, &slot);
+               ret = __radix_tree_lookup(pages, index, &node, &slot);
                WARN_ON_ONCE(ret != entry);
-               __radix_tree_replace(page_tree, node, slot,
+               __radix_tree_replace(pages, node, slot,
                                     new_entry, NULL);
                entry = new_entry;
        }
 
        if (dirty)
-               radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+               radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY);
 
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(pages);
        return entry;
 }
 
@@ -657,17 +716,14 @@ unlock_pte:
        i_mmap_unlock_read(mapping);
 }
 
-static int dax_writeback_one(struct block_device *bdev,
-               struct dax_device *dax_dev, struct address_space *mapping,
-               pgoff_t index, void *entry)
+static int dax_writeback_one(struct dax_device *dax_dev,
+               struct address_space *mapping, pgoff_t index, void *entry)
 {
-       struct radix_tree_root *page_tree = &mapping->page_tree;
-       void *entry2, **slot, *kaddr;
-       long ret = 0, id;
-       sector_t sector;
-       pgoff_t pgoff;
+       struct radix_tree_root *pages = &mapping->i_pages;
+       void *entry2, **slot;
+       unsigned long pfn;
+       long ret = 0;
        size_t size;
-       pfn_t pfn;
 
        /*
         * A page got tagged dirty in DAX mapping? Something is seriously
@@ -676,17 +732,17 @@ static int dax_writeback_one(struct block_device *bdev,
        if (WARN_ON(!radix_tree_exceptional_entry(entry)))
                return -EIO;
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(pages);
        entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
        /* Entry got punched out / reallocated? */
        if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
                goto put_unlocked;
        /*
         * Entry got reallocated elsewhere? No need to writeback. We have to
-        * compare sectors as we must not bail out due to difference in lockbit
+        * compare pfns as we must not bail out due to difference in lockbit
         * or entry type.
         */
-       if (dax_radix_sector(entry2) != dax_radix_sector(entry))
+       if (dax_radix_pfn(entry2) != dax_radix_pfn(entry))
                goto put_unlocked;
        if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
                                dax_is_zero_entry(entry))) {
@@ -695,7 +751,7 @@ static int dax_writeback_one(struct block_device *bdev,
        }
 
        /* Another fsync thread may have already written back this entry */
-       if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+       if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
                goto put_unlocked;
        /* Lock the entry to serialize with page faults */
        entry = lock_slot(mapping, slot);
@@ -703,60 +759,40 @@ static int dax_writeback_one(struct block_device *bdev,
         * We can clear the tag now but we have to be careful so that concurrent
         * dax_writeback_one() calls for the same index cannot finish before we
         * actually flush the caches. This is achieved as the calls will look
-        * at the entry only under tree_lock and once they do that they will
-        * see the entry locked and wait for it to unlock.
+        * at the entry only under the i_pages lock and once they do that
+        * they will see the entry locked and wait for it to unlock.
         */
-       radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
-       spin_unlock_irq(&mapping->tree_lock);
+       radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE);
+       xa_unlock_irq(pages);
 
        /*
         * Even if dax_writeback_mapping_range() was given a wbc->range_start
         * in the middle of a PMD, the 'index' we are given will be aligned to
-        * the start index of the PMD, as will the sector we pull from
-        * 'entry'.  This allows us to flush for PMD_SIZE and not have to
-        * worry about partial PMD writebacks.
+        * the start index of the PMD, as will the pfn we pull from 'entry'.
+        * This allows us to flush for PMD_SIZE and not have to worry about
+        * partial PMD writebacks.
         */
-       sector = dax_radix_sector(entry);
+       pfn = dax_radix_pfn(entry);
        size = PAGE_SIZE << dax_radix_order(entry);
 
-       id = dax_read_lock();
-       ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
-       if (ret)
-               goto dax_unlock;
-
-       /*
-        * dax_direct_access() may sleep, so cannot hold tree_lock over
-        * its invocation.
-        */
-       ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn);
-       if (ret < 0)
-               goto dax_unlock;
-
-       if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) {
-               ret = -EIO;
-               goto dax_unlock;
-       }
-
-       dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn));
-       dax_flush(dax_dev, kaddr, size);
+       dax_mapping_entry_mkclean(mapping, index, pfn);
+       dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size);
        /*
         * After we have flushed the cache, we can clear the dirty tag. There
         * cannot be new dirty data in the pfn after the flush has completed as
         * the pfn mappings are writeprotected and fault waits for mapping
         * entry lock.
         */
-       spin_lock_irq(&mapping->tree_lock);
-       radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY);
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_lock_irq(pages);
+       radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY);
+       xa_unlock_irq(pages);
        trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
- dax_unlock:
-       dax_read_unlock(id);
        put_locked_mapping_entry(mapping, index);
        return ret;
 
  put_unlocked:
        put_unlocked_mapping_entry(mapping, index, entry2);
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(pages);
        return ret;
 }
 
@@ -808,8 +844,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
                                break;
                        }
 
-                       ret = dax_writeback_one(bdev, dax_dev, mapping,
-                                       indices[i], pvec.pages[i]);
+                       ret = dax_writeback_one(dax_dev, mapping, indices[i],
+                                       pvec.pages[i]);
                        if (ret < 0) {
                                mapping_set_error(mapping, ret);
                                goto out;
@@ -877,6 +913,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
        int ret = VM_FAULT_NOPAGE;
        struct page *zero_page;
        void *entry2;
+       pfn_t pfn;
 
        zero_page = ZERO_PAGE(0);
        if (unlikely(!zero_page)) {
@@ -884,14 +921,15 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
                goto out;
        }
 
-       entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+       pfn = page_to_pfn_t(zero_page);
+       entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                        RADIX_DAX_ZERO_PAGE, false);
        if (IS_ERR(entry2)) {
                ret = VM_FAULT_SIGBUS;
                goto out;
        }
 
-       vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page));
+       vm_insert_mixed(vmf->vma, vaddr, pfn);
 out:
        trace_dax_load_hole(inode, vmf, ret);
        return ret;
@@ -1200,8 +1238,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
                if (error < 0)
                        goto error_finish_iomap;
 
-               entry = dax_insert_mapping_entry(mapping, vmf, entry,
-                                                dax_iomap_sector(&iomap, pos),
+               entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                                                 0, write && !sync);
                if (IS_ERR(entry)) {
                        error = PTR_ERR(entry);
@@ -1280,13 +1317,15 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
        void *ret = NULL;
        spinlock_t *ptl;
        pmd_t pmd_entry;
+       pfn_t pfn;
 
        zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
 
        if (unlikely(!zero_page))
                goto fallback;
 
-       ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+       pfn = page_to_pfn_t(zero_page);
+       ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                        RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
        if (IS_ERR(ret))
                goto fallback;
@@ -1409,8 +1448,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
                if (error < 0)
                        goto finish_iomap;
 
-               entry = dax_insert_mapping_entry(mapping, vmf, entry,
-                                               dax_iomap_sector(&iomap, pos),
+               entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                                                RADIX_DAX_PMD, write && !sync);
                if (IS_ERR(entry))
                        goto finish_iomap;
@@ -1524,21 +1562,21 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
        pgoff_t index = vmf->pgoff;
        int vmf_ret, error;
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
        entry = get_unlocked_mapping_entry(mapping, index, &slot);
        /* Did we race with someone splitting entry or so? */
        if (!entry ||
            (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
            (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
                put_unlocked_mapping_entry(mapping, index, entry);
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
                                                      VM_FAULT_NOPAGE);
                return VM_FAULT_NOPAGE;
        }
-       radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY);
+       radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY);
        entry = lock_slot(mapping, slot);
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
        switch (pe_size) {
        case PE_SIZE_PTE:
                error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
index 593079176123823e0ddcf19ce72257e19fdc923e..86d2de63461e1550efe205643d66800dd12f15ac 100644 (file)
@@ -257,11 +257,25 @@ static void __d_free(struct rcu_head *head)
        kmem_cache_free(dentry_cache, dentry); 
 }
 
+static void __d_free_external_name(struct rcu_head *head)
+{
+       struct external_name *name = container_of(head, struct external_name,
+                                                 u.head);
+
+       mod_node_page_state(page_pgdat(virt_to_page(name)),
+                           NR_INDIRECTLY_RECLAIMABLE_BYTES,
+                           -ksize(name));
+
+       kfree(name);
+}
+
 static void __d_free_external(struct rcu_head *head)
 {
        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
-       kfree(external_name(dentry));
-       kmem_cache_free(dentry_cache, dentry); 
+
+       __d_free_external_name(&external_name(dentry)->u.head);
+
+       kmem_cache_free(dentry_cache, dentry);
 }
 
 static inline int dname_external(const struct dentry *dentry)
@@ -291,7 +305,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
                struct external_name *p;
                p = container_of(name->name, struct external_name, name[0]);
                if (unlikely(atomic_dec_and_test(&p->u.count)))
-                       kfree_rcu(p, u.head);
+                       call_rcu(&p->u.head, __d_free_external_name);
        }
 }
 EXPORT_SYMBOL(release_dentry_name_snapshot);
@@ -1038,6 +1052,8 @@ static void shrink_dentry_list(struct list_head *list)
        while (!list_empty(list)) {
                struct dentry *dentry, *parent;
 
+               cond_resched();
+
                dentry = list_entry(list->prev, struct dentry, d_lru);
                spin_lock(&dentry->d_lock);
                rcu_read_lock();
@@ -1191,7 +1207,6 @@ void shrink_dcache_sb(struct super_block *sb)
 
                this_cpu_sub(nr_dentry_unused, freed);
                shrink_dentry_list(&dispose);
-               cond_resched();
        } while (list_lru_count(&sb->s_dentry_lru) > 0);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
@@ -1473,7 +1488,6 @@ void shrink_dcache_parent(struct dentry *parent)
                        break;
 
                shrink_dentry_list(&data.dispose);
-               cond_resched();
        }
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
@@ -1600,7 +1614,6 @@ void d_invalidate(struct dentry *dentry)
                        detach_mounts(data.mountpoint);
                        dput(data.mountpoint);
                }
-               cond_resched();
        }
 }
 EXPORT_SYMBOL(d_invalidate);
@@ -1617,6 +1630,7 @@ EXPORT_SYMBOL(d_invalidate);
  
 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 {
+       struct external_name *ext = NULL;
        struct dentry *dentry;
        char *dname;
        int err;
@@ -1637,14 +1651,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
                dname = dentry->d_iname;
        } else if (name->len > DNAME_INLINE_LEN-1) {
                size_t size = offsetof(struct external_name, name[1]);
-               struct external_name *p = kmalloc(size + name->len,
-                                                 GFP_KERNEL_ACCOUNT);
-               if (!p) {
+
+               ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT);
+               if (!ext) {
                        kmem_cache_free(dentry_cache, dentry); 
                        return NULL;
                }
-               atomic_set(&p->u.count, 1);
-               dname = p->name;
+               atomic_set(&ext->u.count, 1);
+               dname = ext->name;
        } else  {
                dname = dentry->d_iname;
        }       
@@ -1683,6 +1697,12 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
                }
        }
 
+       if (unlikely(ext)) {
+               pg_data_t *pgdat = page_pgdat(virt_to_page(ext));
+               mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES,
+                                   ksize(ext));
+       }
+
        this_cpu_inc(nr_dentry);
 
        return dentry;
@@ -2770,7 +2790,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
                dentry->d_name.hash_len = target->d_name.hash_len;
        }
        if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
-               kfree_rcu(old_name, u.head);
+               call_rcu(&old_name->u.head, __d_free_external_name);
 }
 
 /*
index a919a827d1811ebc6306205c957991b541cafe4e..183059c427b9c5552fc29d5eb01f90891930240f 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -257,7 +257,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                 *    to work from.
                 */
                limit = _STK_LIM / 4 * 3;
-               limit = min(limit, rlimit(RLIMIT_STACK) / 4);
+               limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
                if (size > limit)
                        goto fail;
        }
@@ -411,6 +411,11 @@ static int bprm_mm_init(struct linux_binprm *bprm)
        if (!mm)
                goto err;
 
+       /* Save current stack limit for all calculations made during exec. */
+       task_lock(current->group_leader);
+       bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
+       task_unlock(current->group_leader);
+
        err = __bprm_mm_init(bprm);
        if (err)
                goto err;
@@ -697,7 +702,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 
 #ifdef CONFIG_STACK_GROWSUP
        /* Limit stack size */
-       stack_base = rlimit_max(RLIMIT_STACK);
+       stack_base = bprm->rlim_stack.rlim_max;
        if (stack_base > STACK_SIZE_MAX)
                stack_base = STACK_SIZE_MAX;
 
@@ -770,7 +775,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
         * Align this down to a page boundary as expand_stack
         * will align it up.
         */
-       rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
+       rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
 #ifdef CONFIG_STACK_GROWSUP
        if (stack_size + stack_expand > rlim_stack)
                stack_base = vma->vm_start + rlim_stack;
@@ -1341,11 +1346,11 @@ void setup_new_exec(struct linux_binprm * bprm)
                 * RLIMIT_STACK, but after the point of no return to avoid
                 * needing to clean up the change on failure.
                 */
-               if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
-                       current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
+               if (bprm->rlim_stack.rlim_cur > _STK_LIM)
+                       bprm->rlim_stack.rlim_cur = _STK_LIM;
        }
 
-       arch_pick_mmap_layout(current->mm);
+       arch_pick_mmap_layout(current->mm, &bprm->rlim_stack);
 
        current->sas_ss_sp = current->sas_ss_size = 0;
 
@@ -1378,6 +1383,16 @@ void setup_new_exec(struct linux_binprm * bprm)
 }
 EXPORT_SYMBOL(setup_new_exec);
 
+/* Runs immediately before start_thread() takes over. */
+void finalize_exec(struct linux_binprm *bprm)
+{
+       /* Store any stack rlimit changes before starting thread. */
+       task_lock(current->group_leader);
+       current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
+       task_unlock(current->group_leader);
+}
+EXPORT_SYMBOL(finalize_exec);
+
 /*
  * Prepare credentials and lock ->cred_guard_mutex.
  * install_exec_creds() commits the new creds and drops the lock.
index 032295e1d3865383342f843b99b4c2c0e972078c..cc40802ddfa856d14aefc8ef75ec9e61b89864b0 100644 (file)
@@ -814,6 +814,7 @@ extern const struct inode_operations ext2_file_inode_operations;
 extern const struct file_operations ext2_file_operations;
 
 /* inode.c */
+extern void ext2_set_file_ops(struct inode *inode);
 extern const struct address_space_operations ext2_aops;
 extern const struct address_space_operations ext2_nobh_aops;
 extern const struct iomap_ops ext2_iomap_ops;
index 9b2ac55ac34f0c7d1ccab9d95bedd7e482da66fd..1e01fabef130a602ef82e17a6e77f158534c4bd7 100644 (file)
@@ -940,9 +940,6 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
        loff_t offset = iocb->ki_pos;
        ssize_t ret;
 
-       if (WARN_ON_ONCE(IS_DAX(inode)))
-               return -EIO;
-
        ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
        if (ret < 0 && iov_iter_rw(iter) == WRITE)
                ext2_write_failed(mapping, offset + count);
@@ -952,17 +949,16 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-#ifdef CONFIG_FS_DAX
-       if (dax_mapping(mapping)) {
-               return dax_writeback_mapping_range(mapping,
-                                                  mapping->host->i_sb->s_bdev,
-                                                  wbc);
-       }
-#endif
-
        return mpage_writepages(mapping, wbc, ext2_get_block);
 }
 
+static int
+ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+       return dax_writeback_mapping_range(mapping,
+                       mapping->host->i_sb->s_bdev, wbc);
+}
+
 const struct address_space_operations ext2_aops = {
        .readpage               = ext2_readpage,
        .readpages              = ext2_readpages,
@@ -990,6 +986,13 @@ const struct address_space_operations ext2_nobh_aops = {
        .error_remove_page      = generic_error_remove_page,
 };
 
+static const struct address_space_operations ext2_dax_aops = {
+       .writepages             = ext2_dax_writepages,
+       .direct_IO              = noop_direct_IO,
+       .set_page_dirty         = noop_set_page_dirty,
+       .invalidatepage         = noop_invalidatepage,
+};
+
 /*
  * Probably it should be a library function... search for first non-zero word
  * or memcmp with zero_page, whatever is better for particular architecture.
@@ -1388,6 +1391,18 @@ void ext2_set_inode_flags(struct inode *inode)
                inode->i_flags |= S_DAX;
 }
 
+void ext2_set_file_ops(struct inode *inode)
+{
+       inode->i_op = &ext2_file_inode_operations;
+       inode->i_fop = &ext2_file_operations;
+       if (IS_DAX(inode))
+               inode->i_mapping->a_ops = &ext2_dax_aops;
+       else if (test_opt(inode->i_sb, NOBH))
+               inode->i_mapping->a_ops = &ext2_nobh_aops;
+       else
+               inode->i_mapping->a_ops = &ext2_aops;
+}
+
 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 {
        struct ext2_inode_info *ei;
@@ -1480,14 +1495,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
                ei->i_data[n] = raw_inode->i_block[n];
 
        if (S_ISREG(inode->i_mode)) {
-               inode->i_op = &ext2_file_inode_operations;
-               if (test_opt(inode->i_sb, NOBH)) {
-                       inode->i_mapping->a_ops = &ext2_nobh_aops;
-                       inode->i_fop = &ext2_file_operations;
-               } else {
-                       inode->i_mapping->a_ops = &ext2_aops;
-                       inode->i_fop = &ext2_file_operations;
-               }
+               ext2_set_file_ops(inode);
        } else if (S_ISDIR(inode->i_mode)) {
                inode->i_op = &ext2_dir_inode_operations;
                inode->i_fop = &ext2_dir_operations;
index e078075dc66faaa80ce377118c3750246fc64da8..55f7caadb09333a1d73603f839de3fa5df2f9e42 100644 (file)
@@ -107,14 +107,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
        if (IS_ERR(inode))
                return PTR_ERR(inode);
 
-       inode->i_op = &ext2_file_inode_operations;
-       if (test_opt(inode->i_sb, NOBH)) {
-               inode->i_mapping->a_ops = &ext2_nobh_aops;
-               inode->i_fop = &ext2_file_operations;
-       } else {
-               inode->i_mapping->a_ops = &ext2_aops;
-               inode->i_fop = &ext2_file_operations;
-       }
+       ext2_set_file_ops(inode);
        mark_inode_dirty(inode);
        return ext2_add_nondir(dentry, inode);
 }
@@ -125,14 +118,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
        if (IS_ERR(inode))
                return PTR_ERR(inode);
 
-       inode->i_op = &ext2_file_inode_operations;
-       if (test_opt(inode->i_sb, NOBH)) {
-               inode->i_mapping->a_ops = &ext2_nobh_aops;
-               inode->i_fop = &ext2_file_operations;
-       } else {
-               inode->i_mapping->a_ops = &ext2_aops;
-               inode->i_fop = &ext2_file_operations;
-       }
+       ext2_set_file_ops(inode);
        mark_inode_dirty(inode);
        d_tmpfile(dentry, inode);
        unlock_new_inode(inode);
index 129205028300d48f66793314a45070a4270bb69d..1e50c5efae67505af31e2b384b46952ea0b7a75a 100644 (file)
@@ -2716,12 +2716,6 @@ static int ext4_writepages(struct address_space *mapping,
        percpu_down_read(&sbi->s_journal_flag_rwsem);
        trace_ext4_writepages(inode, wbc);
 
-       if (dax_mapping(mapping)) {
-               ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
-                                                 wbc);
-               goto out_writepages;
-       }
-
        /*
         * No pages to write? This is mainly a kludge to avoid starting
         * a transaction for special inodes like journal inode on last iput()
@@ -2942,6 +2936,27 @@ out_writepages:
        return ret;
 }
 
+static int ext4_dax_writepages(struct address_space *mapping,
+                              struct writeback_control *wbc)
+{
+       int ret;
+       long nr_to_write = wbc->nr_to_write;
+       struct inode *inode = mapping->host;
+       struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+
+       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+               return -EIO;
+
+       percpu_down_read(&sbi->s_journal_flag_rwsem);
+       trace_ext4_writepages(inode, wbc);
+
+       ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc);
+       trace_ext4_writepages_result(inode, wbc, ret,
+                                    nr_to_write - wbc->nr_to_write);
+       percpu_up_read(&sbi->s_journal_flag_rwsem);
+       return ret;
+}
+
 static int ext4_nonda_switch(struct super_block *sb)
 {
        s64 free_clusters, dirty_clusters;
@@ -3845,10 +3860,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
        if (ext4_has_inline_data(inode))
                return 0;
 
-       /* DAX uses iomap path now */
-       if (WARN_ON_ONCE(IS_DAX(inode)))
-               return 0;
-
        trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
        if (iov_iter_rw(iter) == READ)
                ret = ext4_direct_IO_read(iocb, iter);
@@ -3934,6 +3945,13 @@ static const struct address_space_operations ext4_da_aops = {
        .error_remove_page      = generic_error_remove_page,
 };
 
+static const struct address_space_operations ext4_dax_aops = {
+       .writepages             = ext4_dax_writepages,
+       .direct_IO              = noop_direct_IO,
+       .set_page_dirty         = noop_set_page_dirty,
+       .invalidatepage         = noop_invalidatepage,
+};
+
 void ext4_set_aops(struct inode *inode)
 {
        switch (ext4_inode_journal_mode(inode)) {
@@ -3946,7 +3964,9 @@ void ext4_set_aops(struct inode *inode)
        default:
                BUG();
        }
-       if (test_opt(inode->i_sb, DELALLOC))
+       if (IS_DAX(inode))
+               inode->i_mapping->a_ops = &ext4_dax_aops;
+       else if (test_opt(inode->i_sb, DELALLOC))
                inode->i_mapping->a_ops = &ext4_da_aops;
        else
                inode->i_mapping->a_ops = &ext4_aops;
index db50686f509617a4ac719c5009ea99831d39785a..02237d4d91f5a3e459114516cf7797f72ddc1daf 100644 (file)
@@ -2424,12 +2424,12 @@ void f2fs_set_page_dirty_nobuffers(struct page *page)
        SetPageDirty(page);
        spin_unlock(&mapping->private_lock);
 
-       spin_lock_irqsave(&mapping->tree_lock, flags);
+       xa_lock_irqsave(&mapping->i_pages, flags);
        WARN_ON_ONCE(!PageUptodate(page));
        account_page_dirtied(page, mapping);
-       radix_tree_tag_set(&mapping->page_tree,
+       radix_tree_tag_set(&mapping->i_pages,
                        page_index(page), PAGECACHE_TAG_DIRTY);
-       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
        unlock_page_memcg(page);
 
        __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
index fe661274ff1064e0ed65ddeae4e68079a207e304..8c9c2f31b253cffdacb95984f38513b5cd775ecc 100644 (file)
@@ -732,10 +732,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 
        if (bit_pos == NR_DENTRY_IN_BLOCK &&
                        !truncate_hole(dir, page->index, page->index + 1)) {
-               spin_lock_irqsave(&mapping->tree_lock, flags);
-               radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+               xa_lock_irqsave(&mapping->i_pages, flags);
+               radix_tree_tag_clear(&mapping->i_pages, page_index(page),
                                     PAGECACHE_TAG_DIRTY);
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
 
                clear_page_dirty_for_io(page);
                ClearPagePrivate(page);
index bfb7a4a3a929793eae8b7c39dc71ac275e0776bf..9327411fd93b232bb18fc515d06e6122c7a608fa 100644 (file)
@@ -1015,7 +1015,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
        unsigned int init_segno = segno;
        struct gc_inode_list gc_list = {
                .ilist = LIST_HEAD_INIT(gc_list.ilist),
-               .iroot = RADIX_TREE_INIT(GFP_NOFS),
+               .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
        };
 
        trace_f2fs_gc_begin(sbi->sb, sync, background,
index 3b77d642121817d3bf81f617fba6d7ae2275ef36..265da200daa8b161e5abe466c992a03629c109ba 100644 (file)
@@ -226,10 +226,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
        kunmap_atomic(src_addr);
        set_page_dirty(dn.inode_page);
 
-       spin_lock_irqsave(&mapping->tree_lock, flags);
-       radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+       xa_lock_irqsave(&mapping->i_pages, flags);
+       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
                             PAGECACHE_TAG_DIRTY);
-       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
 
        set_inode_flag(inode, FI_APPEND_WRITE);
        set_inode_flag(inode, FI_DATA_EXIST);
index 9a99243054ba28d4906638e926193b7930d2a31d..f202398e20eaaec752c9f5736b047748cee39c65 100644 (file)
@@ -91,11 +91,11 @@ static void clear_node_page_dirty(struct page *page)
        unsigned int long flags;
 
        if (PageDirty(page)) {
-               spin_lock_irqsave(&mapping->tree_lock, flags);
-               radix_tree_tag_clear(&mapping->page_tree,
+               xa_lock_irqsave(&mapping->i_pages, flags);
+               radix_tree_tag_clear(&mapping->i_pages,
                                page_index(page),
                                PAGECACHE_TAG_DIRTY);
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
 
                clear_page_dirty_for_io(page);
                dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
@@ -1161,7 +1161,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
        f2fs_bug_on(sbi, check_nid_range(sbi, nid));
 
        rcu_read_lock();
-       apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
+       apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
        rcu_read_unlock();
        if (apage)
                return;
index 1280f915079b4724eaf8494e06d00de497a93e4e..4b12ba70a895d8a359692f63459860f2e1dcd24c 100644 (file)
@@ -347,9 +347,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
         * By the time control reaches here, RCU grace period has passed
         * since I_WB_SWITCH assertion and all wb stat update transactions
         * between unlocked_inode_to_wb_begin/end() are guaranteed to be
-        * synchronizing against mapping->tree_lock.
+        * synchronizing against the i_pages lock.
         *
-        * Grabbing old_wb->list_lock, inode->i_lock and mapping->tree_lock
+        * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
         * gives us exclusion against all wb related operations on @inode
         * including IO list manipulations and stat updates.
         */
@@ -361,7 +361,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
                spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
        }
        spin_lock(&inode->i_lock);
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
 
        /*
         * Once I_FREEING is visible under i_lock, the eviction path owns
@@ -373,22 +373,22 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
        /*
         * Count and transfer stats.  Note that PAGECACHE_TAG_DIRTY points
         * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
-        * pages actually under underwriteback.
+        * pages actually under writeback.
         */
-       radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
+       radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
                                   PAGECACHE_TAG_DIRTY) {
                struct page *page = radix_tree_deref_slot_protected(slot,
-                                                       &mapping->tree_lock);
+                                               &mapping->i_pages.xa_lock);
                if (likely(page) && PageDirty(page)) {
                        dec_wb_stat(old_wb, WB_RECLAIMABLE);
                        inc_wb_stat(new_wb, WB_RECLAIMABLE);
                }
        }
 
-       radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
+       radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
                                   PAGECACHE_TAG_WRITEBACK) {
                struct page *page = radix_tree_deref_slot_protected(slot,
-                                                       &mapping->tree_lock);
+                                               &mapping->i_pages.xa_lock);
                if (likely(page)) {
                        WARN_ON_ONCE(!PageWriteback(page));
                        dec_wb_stat(old_wb, WB_WRITEBACK);
@@ -430,7 +430,7 @@ skip_switch:
         */
        smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
 
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
        spin_unlock(&inode->i_lock);
        spin_unlock(&new_wb->list_lock);
        spin_unlock(&old_wb->list_lock);
@@ -506,8 +506,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 
        /*
         * In addition to synchronizing among switchers, I_WB_SWITCH tells
-        * the RCU protected stat update paths to grab the mapping's
-        * tree_lock so that stat transfer can synchronize against them.
+        * the RCU protected stat update paths to grab the i_page
+        * lock so that stat transfer can synchronize against them.
         * Let's continue after I_WB_SWITCH is guaranteed to be visible.
         */
        call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
index 7dc55b93a8304e8305c16a35ab48961083d28dd5..97137d7ec5ee8bfe21796abde94144d726785d29 100644 (file)
@@ -832,7 +832,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie,
        /* Clear pointers back to the netfs */
        cookie->netfs_data      = NULL;
        cookie->def             = NULL;
-       BUG_ON(cookie->stores.rnode);
+       BUG_ON(!radix_tree_empty(&cookie->stores));
 
        if (cookie->parent) {
                ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0);
index 1085ca12e25c761d8b09d86b73e30b59d0e3eff7..20e0d0a4dc8cba917ef354e93f1aafe8bfee46a8 100644 (file)
@@ -973,7 +973,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj
         * retire the object instead.
         */
        if (!fscache_use_cookie(object)) {
-               ASSERT(object->cookie->stores.rnode == NULL);
+               ASSERT(radix_tree_empty(&object->cookie->stores));
                set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
                _leave(" [no cookie]");
                return transit_to(KILL_OBJECT);
index b153aeaa61ea5fd37c6b9fa7261e5c56967c2de1..13ceb98c3bd3b344b0fe47118af1122379c58194 100644 (file)
@@ -348,8 +348,7 @@ EXPORT_SYMBOL(inc_nlink);
 
 static void __address_space_init_once(struct address_space *mapping)
 {
-       INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
-       spin_lock_init(&mapping->tree_lock);
+       INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT);
        init_rwsem(&mapping->i_mmap_rwsem);
        INIT_LIST_HEAD(&mapping->private_list);
        spin_lock_init(&mapping->private_lock);
@@ -504,14 +503,14 @@ EXPORT_SYMBOL(__remove_inode_hash);
 void clear_inode(struct inode *inode)
 {
        /*
-        * We have to cycle tree_lock here because reclaim can be still in the
+        * We have to cycle the i_pages lock here because reclaim can be in the
         * process of removing the last page (in __delete_from_page_cache())
-        * and we must not free mapping under it.
+        * and we must not free the mapping under it.
         */
-       spin_lock_irq(&inode->i_data.tree_lock);
+       xa_lock_irq(&inode->i_data.i_pages);
        BUG_ON(inode->i_data.nrpages);
        BUG_ON(inode->i_data.nrexceptional);
-       spin_unlock_irq(&inode->i_data.tree_lock);
+       xa_unlock_irq(&inode->i_data.i_pages);
        BUG_ON(!list_empty(&inode->i_data.private_list));
        BUG_ON(!(inode->i_state & I_FREEING));
        BUG_ON(inode->i_state & I_CLEAR);
index 7ff3cb904acdf8042c7c169dc9816764f0319629..0fb590d79f30ed78d0626fd79f24cacf9e1a72b8 100644 (file)
@@ -1060,6 +1060,45 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 }
 EXPORT_SYMBOL(noop_fsync);
 
+int noop_set_page_dirty(struct page *page)
+{
+       /*
+        * Unlike __set_page_dirty_no_writeback that handles dirty page
+        * tracking in the page object, dax does all dirty tracking in
+        * the inode address_space in response to mkwrite faults. In the
+        * dax case we only need to worry about potentially dirty CPU
+        * caches, not dirty page cache pages to write back.
+        *
+        * This callback is defined to prevent fallback to
+        * __set_page_dirty_buffers() in set_page_dirty().
+        */
+       return 0;
+}
+EXPORT_SYMBOL_GPL(noop_set_page_dirty);
+
+void noop_invalidatepage(struct page *page, unsigned int offset,
+               unsigned int length)
+{
+       /*
+        * There is no page cache to invalidate in the dax case, however
+        * we need this callback defined to prevent falling back to
+        * block_invalidatepage() in do_invalidatepage().
+        */
+}
+EXPORT_SYMBOL_GPL(noop_invalidatepage);
+
+ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+       /*
+        * iomap based filesystems support direct I/O without need for
+        * this callback. However, it still needs to be set in
+        * inode->a_ops so that open/fcntl know that direct I/O is
+        * generally supported.
+        */
+       return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(noop_direct_IO);
+
 /* Because kfree isn't assignment-compatible with void(void*) ;-/ */
 void kfree_link(void *p)
 {
index c21e0b4454a6762a2d6bbbed84dd03858afa2185..dec98cab729dd90fdb491e5ac770c8bf99867aec 100644 (file)
@@ -193,9 +193,9 @@ retry:
                                       (unsigned long long)oldkey,
                                       (unsigned long long)newkey);
 
-               spin_lock_irq(&btnc->tree_lock);
-               err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
-               spin_unlock_irq(&btnc->tree_lock);
+               xa_lock_irq(&btnc->i_pages);
+               err = radix_tree_insert(&btnc->i_pages, newkey, obh->b_page);
+               xa_unlock_irq(&btnc->i_pages);
                /*
                 * Note: page->index will not change to newkey until
                 * nilfs_btnode_commit_change_key() will be called.
@@ -251,11 +251,11 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
                                       (unsigned long long)newkey);
                mark_buffer_dirty(obh);
 
-               spin_lock_irq(&btnc->tree_lock);
-               radix_tree_delete(&btnc->page_tree, oldkey);
-               radix_tree_tag_set(&btnc->page_tree, newkey,
+               xa_lock_irq(&btnc->i_pages);
+               radix_tree_delete(&btnc->i_pages, oldkey);
+               radix_tree_tag_set(&btnc->i_pages, newkey,
                                   PAGECACHE_TAG_DIRTY);
-               spin_unlock_irq(&btnc->tree_lock);
+               xa_unlock_irq(&btnc->i_pages);
 
                opage->index = obh->b_blocknr = newkey;
                unlock_page(opage);
@@ -283,9 +283,9 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
                return;
 
        if (nbh == NULL) {      /* blocksize == pagesize */
-               spin_lock_irq(&btnc->tree_lock);
-               radix_tree_delete(&btnc->page_tree, newkey);
-               spin_unlock_irq(&btnc->tree_lock);
+               xa_lock_irq(&btnc->i_pages);
+               radix_tree_delete(&btnc->i_pages, newkey);
+               xa_unlock_irq(&btnc->i_pages);
                unlock_page(ctxt->bh->b_page);
        } else
                brelse(nbh);
index 68241512d7c179bfcf86d90559e5e03a85a814b1..4cb850a6f1c2c64794449e922c6e40f69aa585b6 100644 (file)
@@ -331,15 +331,15 @@ repeat:
                        struct page *page2;
 
                        /* move the page to the destination cache */
-                       spin_lock_irq(&smap->tree_lock);
-                       page2 = radix_tree_delete(&smap->page_tree, offset);
+                       xa_lock_irq(&smap->i_pages);
+                       page2 = radix_tree_delete(&smap->i_pages, offset);
                        WARN_ON(page2 != page);
 
                        smap->nrpages--;
-                       spin_unlock_irq(&smap->tree_lock);
+                       xa_unlock_irq(&smap->i_pages);
 
-                       spin_lock_irq(&dmap->tree_lock);
-                       err = radix_tree_insert(&dmap->page_tree, offset, page);
+                       xa_lock_irq(&dmap->i_pages);
+                       err = radix_tree_insert(&dmap->i_pages, offset, page);
                        if (unlikely(err < 0)) {
                                WARN_ON(err == -EEXIST);
                                page->mapping = NULL;
@@ -348,11 +348,11 @@ repeat:
                                page->mapping = dmap;
                                dmap->nrpages++;
                                if (PageDirty(page))
-                                       radix_tree_tag_set(&dmap->page_tree,
+                                       radix_tree_tag_set(&dmap->i_pages,
                                                           offset,
                                                           PAGECACHE_TAG_DIRTY);
                        }
-                       spin_unlock_irq(&dmap->tree_lock);
+                       xa_unlock_irq(&dmap->i_pages);
                }
                unlock_page(page);
        }
@@ -474,15 +474,15 @@ int __nilfs_clear_page_dirty(struct page *page)
        struct address_space *mapping = page->mapping;
 
        if (mapping) {
-               spin_lock_irq(&mapping->tree_lock);
+               xa_lock_irq(&mapping->i_pages);
                if (test_bit(PG_dirty, &page->flags)) {
-                       radix_tree_tag_clear(&mapping->page_tree,
+                       radix_tree_tag_clear(&mapping->i_pages,
                                             page_index(page),
                                             PAGECACHE_TAG_DIRTY);
-                       spin_unlock_irq(&mapping->tree_lock);
+                       xa_unlock_irq(&mapping->i_pages);
                        return clear_page_dirty_for_io(page);
                }
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                return 0;
        }
        return TestClearPageDirty(page);
index 598803576e4c04445b56bb5e7f4ae564c764f7a3..ae2c807fd719540312d18a94c7676ceb51b10f12 100644 (file)
@@ -141,25 +141,12 @@ static inline const char *get_task_state(struct task_struct *tsk)
        return task_state_array[task_state_index(tsk)];
 }
 
-static inline int get_task_umask(struct task_struct *tsk)
-{
-       struct fs_struct *fs;
-       int umask = -ENOENT;
-
-       task_lock(tsk);
-       fs = tsk->fs;
-       if (fs)
-               umask = fs->umask;
-       task_unlock(tsk);
-       return umask;
-}
-
 static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
                                struct pid *pid, struct task_struct *p)
 {
        struct user_namespace *user_ns = seq_user_ns(m);
        struct group_info *group_info;
-       int g, umask;
+       int g, umask = -1;
        struct task_struct *tracer;
        const struct cred *cred;
        pid_t ppid, tpid = 0, tgid, ngid;
@@ -177,17 +164,18 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
        ngid = task_numa_group_id(p);
        cred = get_task_cred(p);
 
-       umask = get_task_umask(p);
-       if (umask >= 0)
-               seq_printf(m, "Umask:\t%#04o\n", umask);
-
        task_lock(p);
+       if (p->fs)
+               umask = p->fs->umask;
        if (p->files)
                max_fds = files_fdtable(p->files)->max_fds;
        task_unlock(p);
        rcu_read_unlock();
 
-       seq_printf(m, "State:\t%s", get_task_state(p));
+       if (umask >= 0)
+               seq_printf(m, "Umask:\t%#04o\n", umask);
+       seq_puts(m, "State:\t");
+       seq_puts(m, get_task_state(p));
 
        seq_put_decimal_ull(m, "\nTgid:\t", tgid);
        seq_put_decimal_ull(m, "\nNgid:\t", ngid);
@@ -313,8 +301,8 @@ static void render_cap_t(struct seq_file *m, const char *header,
 
        seq_puts(m, header);
        CAP_FOR_EACH_U32(__capi) {
-               seq_printf(m, "%08x",
-                          a->cap[CAP_LAST_U32 - __capi]);
+               seq_put_hex_ll(m, NULL,
+                          a->cap[CAP_LAST_U32 - __capi], 8);
        }
        seq_putc(m, '\n');
 }
@@ -368,7 +356,8 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
 
 static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
 {
-       seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state);
+       seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
+       seq_putc(m, '\n');
 }
 
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@@ -504,7 +493,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
        /* convert nsec -> ticks */
        start_time = nsec_to_clock_t(task->real_start_time);
 
-       seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
+       seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
+       seq_puts(m, " (");
+       seq_puts(m, tcomm);
+       seq_puts(m, ") ");
+       seq_putc(m, state);
        seq_put_decimal_ll(m, " ", ppid);
        seq_put_decimal_ll(m, " ", pgid);
        seq_put_decimal_ll(m, " ", sid);
index d53246863cfbdea26fecae460d44192b67bbd61e..eafa39a3a88cb479eaca9d164e05cc4dbca1db70 100644 (file)
@@ -388,14 +388,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
        unsigned long wchan;
        char symname[KSYM_NAME_LEN];
 
-       wchan = get_wchan(task);
+       if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+               goto print0;
 
-       if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
-                       && !lookup_symbol_name(wchan, symname))
-               seq_printf(m, "%s", symname);
-       else
-               seq_putc(m, '0');
+       wchan = get_wchan(task);
+       if (wchan && !lookup_symbol_name(wchan, symname)) {
+               seq_puts(m, symname);
+               return 0;
+       }
 
+print0:
+       seq_putc(m, '0');
        return 0;
 }
 #endif /* CONFIG_KALLSYMS */
@@ -1910,6 +1913,8 @@ static int dname_to_vma_addr(struct dentry *dentry,
        unsigned long long sval, eval;
        unsigned int len;
 
+       if (str[0] == '0' && str[1] != '-')
+               return -EINVAL;
        len = _parse_integer(str, 16, &sval);
        if (len & KSTRTOX_OVERFLOW)
                return -EINVAL;
@@ -1921,6 +1926,8 @@ static int dname_to_vma_addr(struct dentry *dentry,
                return -EINVAL;
        str++;
 
+       if (str[0] == '0' && str[1])
+               return -EINVAL;
        len = _parse_integer(str, 16, &eval);
        if (len & KSTRTOX_OVERFLOW)
                return -EINVAL;
@@ -2204,6 +2211,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
                }
        }
        up_read(&mm->mmap_sem);
+       mmput(mm);
 
        for (i = 0; i < nr_files; i++) {
                char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
@@ -2221,7 +2229,6 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
        }
        if (fa)
                flex_array_free(fa);
-       mmput(mm);
 
 out_put_task:
        put_task_struct(task);
index 403cbb12a6e9b428fb56f0767492a0dd3acde536..8233e7af9389dc8316e71a4f42e0206a6ceb5e9b 100644 (file)
@@ -6,7 +6,8 @@
 
 static int cmdline_proc_show(struct seq_file *m, void *v)
 {
-       seq_printf(m, "%s\n", saved_command_line);
+       seq_puts(m, saved_command_line);
+       seq_putc(m, '\n');
        return 0;
 }
 
index 5d709fa8f3a2c5d9c9f18a9baa1600aef5077636..04c4804cbdef89bb219573fc3db18b9d6a02a149 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright (C) 1997 Theodore Ts'o
  */
 
+#include <linux/cache.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 
 static DEFINE_RWLOCK(proc_subdir_lock);
 
+struct kmem_cache *proc_dir_entry_cache __ro_after_init;
+
+void pde_free(struct proc_dir_entry *pde)
+{
+       if (S_ISLNK(pde->mode))
+               kfree(pde->data);
+       if (pde->name != pde->inline_name)
+               kfree(pde->name);
+       kmem_cache_free(proc_dir_entry_cache, pde);
+}
+
 static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len)
 {
        if (len < de->namelen)
@@ -40,8 +52,8 @@ static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int
 
 static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
 {
-       return rb_entry_safe(rb_first_cached(&dir->subdir),
-                            struct proc_dir_entry, subdir_node);
+       return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
+                            subdir_node);
 }
 
 static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
@@ -54,7 +66,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
                                              const char *name,
                                              unsigned int len)
 {
-       struct rb_node *node = dir->subdir.rb_root.rb_node;
+       struct rb_node *node = dir->subdir.rb_node;
 
        while (node) {
                struct proc_dir_entry *de = rb_entry(node,
@@ -75,9 +87,8 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
 static bool pde_subdir_insert(struct proc_dir_entry *dir,
                              struct proc_dir_entry *de)
 {
-       struct rb_root_cached *root = &dir->subdir;
-       struct rb_node **new = &root->rb_root.rb_node, *parent = NULL;
-       bool leftmost = true;
+       struct rb_root *root = &dir->subdir;
+       struct rb_node **new = &root->rb_node, *parent = NULL;
 
        /* Figure out where to put new node */
        while (*new) {
@@ -89,16 +100,15 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
                parent = *new;
                if (result < 0)
                        new = &(*new)->rb_left;
-               else if (result > 0) {
+               else if (result > 0)
                        new = &(*new)->rb_right;
-                       leftmost = false;
-               } else
+               else
                        return false;
        }
 
        /* Add new node and rebalance tree. */
        rb_link_node(&de->subdir_node, parent, new);
-       rb_insert_color_cached(&de->subdir_node, root, leftmost);
+       rb_insert_color(&de->subdir_node, root);
        return true;
 }
 
@@ -354,6 +364,14 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
                WARN(1, "name len %u\n", qstr.len);
                return NULL;
        }
+       if (qstr.len == 1 && fn[0] == '.') {
+               WARN(1, "name '.'\n");
+               return NULL;
+       }
+       if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') {
+               WARN(1, "name '..'\n");
+               return NULL;
+       }
        if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
                WARN(1, "create '/proc/%s' by hand\n", qstr.name);
                return NULL;
@@ -363,16 +381,26 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
                return NULL;
        }
 
-       ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
+       ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL);
        if (!ent)
                goto out;
 
+       if (qstr.len + 1 <= sizeof(ent->inline_name)) {
+               ent->name = ent->inline_name;
+       } else {
+               ent->name = kmalloc(qstr.len + 1, GFP_KERNEL);
+               if (!ent->name) {
+                       pde_free(ent);
+                       return NULL;
+               }
+       }
+
        memcpy(ent->name, fn, qstr.len + 1);
        ent->namelen = qstr.len;
        ent->mode = mode;
        ent->nlink = nlink;
-       ent->subdir = RB_ROOT_CACHED;
-       atomic_set(&ent->count, 1);
+       ent->subdir = RB_ROOT;
+       refcount_set(&ent->refcnt, 1);
        spin_lock_init(&ent->pde_unload_lock);
        INIT_LIST_HEAD(&ent->pde_openers);
        proc_set_user(ent, (*parent)->uid, (*parent)->gid);
@@ -395,12 +423,11 @@ struct proc_dir_entry *proc_symlink(const char *name,
                        strcpy((char*)ent->data,dest);
                        ent->proc_iops = &proc_link_inode_operations;
                        if (proc_register(parent, ent) < 0) {
-                               kfree(ent->data);
-                               kfree(ent);
+                               pde_free(ent);
                                ent = NULL;
                        }
                } else {
-                       kfree(ent);
+                       pde_free(ent);
                        ent = NULL;
                }
        }
@@ -423,7 +450,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
                ent->proc_iops = &proc_dir_inode_operations;
                parent->nlink++;
                if (proc_register(parent, ent) < 0) {
-                       kfree(ent);
+                       pde_free(ent);
                        parent->nlink--;
                        ent = NULL;
                }
@@ -458,7 +485,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
                ent->proc_iops = NULL;
                parent->nlink++;
                if (proc_register(parent, ent) < 0) {
-                       kfree(ent);
+                       pde_free(ent);
                        parent->nlink--;
                        ent = NULL;
                }
@@ -495,7 +522,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
                goto out_free;
        return pde;
 out_free:
-       kfree(pde);
+       pde_free(pde);
 out:
        return NULL;
 }
@@ -522,19 +549,12 @@ void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
 }
 EXPORT_SYMBOL(proc_set_user);
 
-static void free_proc_entry(struct proc_dir_entry *de)
-{
-       proc_free_inum(de->low_ino);
-
-       if (S_ISLNK(de->mode))
-               kfree(de->data);
-       kfree(de);
-}
-
 void pde_put(struct proc_dir_entry *pde)
 {
-       if (atomic_dec_and_test(&pde->count))
-               free_proc_entry(pde);
+       if (refcount_dec_and_test(&pde->refcnt)) {
+               proc_free_inum(pde->low_ino);
+               pde_free(pde);
+       }
 }
 
 /*
@@ -555,7 +575,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 
        de = pde_subdir_find(parent, fn, len);
        if (de)
-               rb_erase_cached(&de->subdir_node, &parent->subdir);
+               rb_erase(&de->subdir_node, &parent->subdir);
        write_unlock(&proc_subdir_lock);
        if (!de) {
                WARN(1, "name '%s'\n", name);
@@ -592,13 +612,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
                write_unlock(&proc_subdir_lock);
                return -ENOENT;
        }
-       rb_erase_cached(&root->subdir_node, &parent->subdir);
+       rb_erase(&root->subdir_node, &parent->subdir);
 
        de = root;
        while (1) {
                next = pde_subdir_first(de);
                if (next) {
-                       rb_erase_cached(&next->subdir_node, &de->subdir);
+                       rb_erase(&next->subdir_node, &de->subdir);
                        de = next;
                        continue;
                }
index 6e8724958116c56dc97dd05414269622b330e58d..2cf3b74391ca5774a04cabe2b70e1e84ec7b3002 100644 (file)
@@ -54,6 +54,7 @@ static void proc_evict_inode(struct inode *inode)
 }
 
 static struct kmem_cache *proc_inode_cachep __ro_after_init;
+static struct kmem_cache *pde_opener_cache __ro_after_init;
 
 static struct inode *proc_alloc_inode(struct super_block *sb)
 {
@@ -92,7 +93,7 @@ static void init_once(void *foo)
        inode_init_once(&ei->vfs_inode);
 }
 
-void __init proc_init_inodecache(void)
+void __init proc_init_kmemcache(void)
 {
        proc_inode_cachep = kmem_cache_create("proc_inode_cache",
                                             sizeof(struct proc_inode),
@@ -100,6 +101,13 @@ void __init proc_init_inodecache(void)
                                                SLAB_MEM_SPREAD|SLAB_ACCOUNT|
                                                SLAB_PANIC),
                                             init_once);
+       pde_opener_cache =
+               kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0,
+                                 SLAB_ACCOUNT|SLAB_PANIC, NULL);
+       proc_dir_entry_cache = kmem_cache_create_usercopy(
+               "proc_dir_entry", sizeof(struct proc_dir_entry), 0, SLAB_PANIC,
+               offsetof(struct proc_dir_entry, inline_name),
+               sizeof_field(struct proc_dir_entry, inline_name), NULL);
 }
 
 static int proc_show_options(struct seq_file *seq, struct dentry *root)
@@ -138,7 +146,7 @@ static void unuse_pde(struct proc_dir_entry *pde)
                complete(pde->pde_unload_completion);
 }
 
-/* pde is locked */
+/* pde is locked on entry, unlocked on exit */
 static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 {
        /*
@@ -157,9 +165,10 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
                pdeo->c = &c;
                spin_unlock(&pde->pde_unload_lock);
                wait_for_completion(&c);
-               spin_lock(&pde->pde_unload_lock);
        } else {
                struct file *file;
+               struct completion *c;
+
                pdeo->closing = true;
                spin_unlock(&pde->pde_unload_lock);
                file = pdeo->file;
@@ -167,9 +176,11 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
                spin_lock(&pde->pde_unload_lock);
                /* After ->release. */
                list_del(&pdeo->lh);
-               if (unlikely(pdeo->c))
-                       complete(pdeo->c);
-               kfree(pdeo);
+               c = pdeo->c;
+               spin_unlock(&pde->pde_unload_lock);
+               if (unlikely(c))
+                       complete(c);
+               kmem_cache_free(pde_opener_cache, pdeo);
        }
 }
 
@@ -188,6 +199,7 @@ void proc_entry_rundown(struct proc_dir_entry *de)
                struct pde_opener *pdeo;
                pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
                close_pdeo(de, pdeo);
+               spin_lock(&de->pde_unload_lock);
        }
        spin_unlock(&de->pde_unload_lock);
 }
@@ -338,31 +350,36 @@ static int proc_reg_open(struct inode *inode, struct file *file)
         *
         * Save every "struct file" with custom ->release hook.
         */
-       pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
-       if (!pdeo)
-               return -ENOMEM;
-
-       if (!use_pde(pde)) {
-               kfree(pdeo);
+       if (!use_pde(pde))
                return -ENOENT;
-       }
-       open = pde->proc_fops->open;
+
        release = pde->proc_fops->release;
+       if (release) {
+               pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL);
+               if (!pdeo) {
+                       rv = -ENOMEM;
+                       goto out_unuse;
+               }
+       }
 
+       open = pde->proc_fops->open;
        if (open)
                rv = open(inode, file);
 
-       if (rv == 0 && release) {
-               /* To know what to release. */
-               pdeo->file = file;
-               pdeo->closing = false;
-               pdeo->c = NULL;
-               spin_lock(&pde->pde_unload_lock);
-               list_add(&pdeo->lh, &pde->pde_openers);
-               spin_unlock(&pde->pde_unload_lock);
-       } else
-               kfree(pdeo);
+       if (release) {
+               if (rv == 0) {
+                       /* To know what to release. */
+                       pdeo->file = file;
+                       pdeo->closing = false;
+                       pdeo->c = NULL;
+                       spin_lock(&pde->pde_unload_lock);
+                       list_add(&pdeo->lh, &pde->pde_openers);
+                       spin_unlock(&pde->pde_unload_lock);
+               } else
+                       kmem_cache_free(pde_opener_cache, pdeo);
+       }
 
+out_unuse:
        unuse_pde(pde);
        return rv;
 }
@@ -375,7 +392,7 @@ static int proc_reg_release(struct inode *inode, struct file *file)
        list_for_each_entry(pdeo, &pde->pde_openers, lh) {
                if (pdeo->file == file) {
                        close_pdeo(pde, pdeo);
-                       break;
+                       return 0;
                }
        }
        spin_unlock(&pde->pde_unload_lock);
index d697c8ab0a140d5485ddb4968df314a3bb99ff67..0f1692e63cb62324f69a64100daae0bdfc383784 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/proc_fs.h>
 #include <linux/proc_ns.h>
+#include <linux/refcount.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
 #include <linux/binfmts.h>
@@ -36,7 +37,7 @@ struct proc_dir_entry {
         * negative -> it's going away RSN
         */
        atomic_t in_use;
-       atomic_t count;         /* use count */
+       refcount_t refcnt;
        struct list_head pde_openers;   /* who did ->open, but not ->release */
        /* protects ->pde_openers and all struct pde_opener instances */
        spinlock_t pde_unload_lock;
@@ -50,13 +51,22 @@ struct proc_dir_entry {
        kgid_t gid;
        loff_t size;
        struct proc_dir_entry *parent;
-       struct rb_root_cached subdir;
+       struct rb_root subdir;
        struct rb_node subdir_node;
+       char *name;
        umode_t mode;
        u8 namelen;
-       char name[];
+#ifdef CONFIG_64BIT
+#define SIZEOF_PDE_INLINE_NAME (192-139)
+#else
+#define SIZEOF_PDE_INLINE_NAME (128-87)
+#endif
+       char inline_name[SIZEOF_PDE_INLINE_NAME];
 } __randomize_layout;
 
+extern struct kmem_cache *proc_dir_entry_cache;
+void pde_free(struct proc_dir_entry *pde);
+
 union proc_op {
        int (*proc_get_link)(struct dentry *, struct path *);
        int (*proc_show)(struct seq_file *m,
@@ -159,7 +169,7 @@ int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *
 
 static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
 {
-       atomic_inc(&pde->count);
+       refcount_inc(&pde->refcnt);
        return pde;
 }
 extern void pde_put(struct proc_dir_entry *);
@@ -177,12 +187,12 @@ struct pde_opener {
        struct list_head lh;
        bool closing;
        struct completion *c;
-};
+} __randomize_layout;
 extern const struct inode_operations proc_link_inode_operations;
 
 extern const struct inode_operations proc_pid_link_inode_operations;
 
-extern void proc_init_inodecache(void);
+void proc_init_kmemcache(void);
 void set_proc_pid_nlink(void);
 extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
 extern int proc_fill_super(struct super_block *, void *data, int flags);
index 6bb20f8642590f72163adc591831754f8b34a961..65a72ab5747169e6dab29b95bd57e460b583078b 100644 (file)
@@ -26,20 +26,7 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 
 static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
 {
-       char v[32];
-       static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '};
-       int len;
-
-       len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10));
-
-       seq_write(m, s, 16);
-
-       if (len > 0) {
-               if (len < 8)
-                       seq_write(m, blanks, 8 - len);
-
-               seq_write(m, v, len);
-       }
+       seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8);
        seq_write(m, " kB\n", 4);
 }
 
index 68c06ae7888c8cb5d07053863698b7a478d68d4e..1763f370489d204a4c7dcefe36ff163be1ec2a32 100644 (file)
@@ -192,15 +192,16 @@ static __net_init int proc_net_ns_init(struct net *net)
        int err;
 
        err = -ENOMEM;
-       netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL);
+       netd = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL);
        if (!netd)
                goto out;
 
-       netd->subdir = RB_ROOT_CACHED;
+       netd->subdir = RB_ROOT;
        netd->data = net;
        netd->nlink = 2;
        netd->namelen = 3;
        netd->parent = &proc_root;
+       netd->name = netd->inline_name;
        memcpy(netd->name, "net", 4);
 
        uid = make_kuid(net->user_ns, 0);
@@ -223,7 +224,7 @@ static __net_init int proc_net_ns_init(struct net *net)
        return 0;
 
 free_net:
-       kfree(netd);
+       pde_free(netd);
 out:
        return err;
 }
@@ -231,7 +232,7 @@ out:
 static __net_exit void proc_net_ns_exit(struct net *net)
 {
        remove_proc_entry("stat", net->proc_net);
-       kfree(net->proc_net);
+       pde_free(net->proc_net);
 }
 
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
index c41ab261397df2875951db47ea38b7cbe2a5bae1..8989936f29959a62a12e4b1c1e2d866f940a93a0 100644 (file)
@@ -707,14 +707,14 @@ static bool proc_sys_link_fill_cache(struct file *file,
                                    struct ctl_table *table)
 {
        bool ret = true;
+
        head = sysctl_head_grab(head);
+       if (IS_ERR(head))
+               return false;
 
-       if (S_ISLNK(table->mode)) {
-               /* It is not an error if we can not follow the link ignore it */
-               int err = sysctl_follow_link(&head, &table);
-               if (err)
-                       goto out;
-       }
+       /* It is not an error if we can not follow the link ignore it */
+       if (sysctl_follow_link(&head, &table))
+               goto out;
 
        ret = proc_sys_fill_cache(file, ctx, head, table);
 out:
@@ -1086,7 +1086,7 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table)
        if ((table->proc_handler == proc_douintvec) ||
            (table->proc_handler == proc_douintvec_minmax)) {
                if (table->maxlen != sizeof(unsigned int))
-                       err |= sysctl_err(path, table, "array now allowed");
+                       err |= sysctl_err(path, table, "array not allowed");
        }
 
        return err;
index ede8e64974be240368d11ab47f3227f72a016e23..61b7340b357a2aad54e6c9b5eca97b8c9bd7a425 100644 (file)
@@ -123,23 +123,13 @@ static struct file_system_type proc_fs_type = {
 
 void __init proc_root_init(void)
 {
-       int err;
-
-       proc_init_inodecache();
+       proc_init_kmemcache();
        set_proc_pid_nlink();
-       err = register_filesystem(&proc_fs_type);
-       if (err)
-               return;
-
        proc_self_init();
        proc_thread_self_init();
        proc_symlink("mounts", NULL, "self/mounts");
 
        proc_net_init();
-
-#ifdef CONFIG_SYSVIPC
-       proc_mkdir("sysvipc", NULL);
-#endif
        proc_mkdir("fs", NULL);
        proc_mkdir("driver", NULL);
        proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
@@ -150,6 +140,8 @@ void __init proc_root_init(void)
        proc_tty_init();
        proc_mkdir("bus", NULL);
        proc_sys_init();
+
+       register_filesystem(&proc_fs_type);
 }
 
 static int proc_root_getattr(const struct path *path, struct kstat *stat,
@@ -207,12 +199,13 @@ struct proc_dir_entry proc_root = {
        .namelen        = 5, 
        .mode           = S_IFDIR | S_IRUGO | S_IXUGO, 
        .nlink          = 2, 
-       .count          = ATOMIC_INIT(1),
+       .refcnt         = REFCOUNT_INIT(1),
        .proc_iops      = &proc_root_inode_operations, 
        .proc_fops      = &proc_root_operations,
        .parent         = &proc_root,
-       .subdir         = RB_ROOT_CACHED,
-       .name           = "/proc",
+       .subdir         = RB_ROOT,
+       .name           = proc_root.inline_name,
+       .inline_name    = "/proc",
 };
 
 int pid_ns_prepare_proc(struct pid_namespace *ns)
index ec6d2983a5cb65e5c46a40c5b6a18da11db10749..65ae54659833888142d376835d9b4a8a70c7218e 100644 (file)
@@ -24,6 +24,8 @@
 #include <asm/tlbflush.h>
 #include "internal.h"
 
+#define SEQ_PUT_DEC(str, val) \
+               seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
        unsigned long text, lib, swap, anon, file, shmem;
@@ -53,39 +55,28 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
        lib = (mm->exec_vm << PAGE_SHIFT) - text;
 
        swap = get_mm_counter(mm, MM_SWAPENTS);
-       seq_printf(m,
-               "VmPeak:\t%8lu kB\n"
-               "VmSize:\t%8lu kB\n"
-               "VmLck:\t%8lu kB\n"
-               "VmPin:\t%8lu kB\n"
-               "VmHWM:\t%8lu kB\n"
-               "VmRSS:\t%8lu kB\n"
-               "RssAnon:\t%8lu kB\n"
-               "RssFile:\t%8lu kB\n"
-               "RssShmem:\t%8lu kB\n"
-               "VmData:\t%8lu kB\n"
-               "VmStk:\t%8lu kB\n"
-               "VmExe:\t%8lu kB\n"
-               "VmLib:\t%8lu kB\n"
-               "VmPTE:\t%8lu kB\n"
-               "VmSwap:\t%8lu kB\n",
-               hiwater_vm << (PAGE_SHIFT-10),
-               total_vm << (PAGE_SHIFT-10),
-               mm->locked_vm << (PAGE_SHIFT-10),
-               mm->pinned_vm << (PAGE_SHIFT-10),
-               hiwater_rss << (PAGE_SHIFT-10),
-               total_rss << (PAGE_SHIFT-10),
-               anon << (PAGE_SHIFT-10),
-               file << (PAGE_SHIFT-10),
-               shmem << (PAGE_SHIFT-10),
-               mm->data_vm << (PAGE_SHIFT-10),
-               mm->stack_vm << (PAGE_SHIFT-10),
-               text >> 10,
-               lib >> 10,
-               mm_pgtables_bytes(mm) >> 10,
-               swap << (PAGE_SHIFT-10));
+       SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
+       SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
+       SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
+       SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+       SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
+       SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
+       SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
+       SEQ_PUT_DEC(" kB\nRssFile:\t", file);
+       SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
+       SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
+       SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
+       seq_put_decimal_ull_width(m,
+                   " kB\nVmExe:\t", text >> 10, 8);
+       seq_put_decimal_ull_width(m,
+                   " kB\nVmLib:\t", lib >> 10, 8);
+       seq_put_decimal_ull_width(m,
+                   " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
+       SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
+       seq_puts(m, " kB\n");
        hugetlb_report_usage(m, mm);
 }
+#undef SEQ_PUT_DEC
 
 unsigned long task_vsize(struct mm_struct *mm)
 {
@@ -287,15 +278,18 @@ static void show_vma_header_prefix(struct seq_file *m,
                                   dev_t dev, unsigned long ino)
 {
        seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
-       seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
-                  start,
-                  end,
-                  flags & VM_READ ? 'r' : '-',
-                  flags & VM_WRITE ? 'w' : '-',
-                  flags & VM_EXEC ? 'x' : '-',
-                  flags & VM_MAYSHARE ? 's' : 'p',
-                  pgoff,
-                  MAJOR(dev), MINOR(dev), ino);
+       seq_put_hex_ll(m, NULL, start, 8);
+       seq_put_hex_ll(m, "-", end, 8);
+       seq_putc(m, ' ');
+       seq_putc(m, flags & VM_READ ? 'r' : '-');
+       seq_putc(m, flags & VM_WRITE ? 'w' : '-');
+       seq_putc(m, flags & VM_EXEC ? 'x' : '-');
+       seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p');
+       seq_put_hex_ll(m, " ", pgoff, 8);
+       seq_put_hex_ll(m, " ", MAJOR(dev), 2);
+       seq_put_hex_ll(m, ":", MINOR(dev), 2);
+       seq_put_decimal_ull(m, " ", ino);
+       seq_putc(m, ' ');
 }
 
 static void
@@ -694,8 +688,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
                if (!mnemonics[i][0])
                        continue;
                if (vma->vm_flags & (1UL << i)) {
-                       seq_printf(m, "%c%c ",
-                                  mnemonics[i][0], mnemonics[i][1]);
+                       seq_putc(m, mnemonics[i][0]);
+                       seq_putc(m, mnemonics[i][1]);
+                       seq_putc(m, ' ');
                }
        }
        seq_putc(m, '\n');
@@ -736,6 +731,8 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
 {
 }
 
+#define SEQ_PUT_DEC(str, val) \
+               seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
 static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
        struct proc_maps_private *priv = m->private;
@@ -809,51 +806,34 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
                ret = SEQ_SKIP;
        }
 
-       if (!rollup_mode)
-               seq_printf(m,
-                          "Size:           %8lu kB\n"
-                          "KernelPageSize: %8lu kB\n"
-                          "MMUPageSize:    %8lu kB\n",
-                          (vma->vm_end - vma->vm_start) >> 10,
-                          vma_kernel_pagesize(vma) >> 10,
-                          vma_mmu_pagesize(vma) >> 10);
-
-
-       if (!rollup_mode || last_vma)
-               seq_printf(m,
-                          "Rss:            %8lu kB\n"
-                          "Pss:            %8lu kB\n"
-                          "Shared_Clean:   %8lu kB\n"
-                          "Shared_Dirty:   %8lu kB\n"
-                          "Private_Clean:  %8lu kB\n"
-                          "Private_Dirty:  %8lu kB\n"
-                          "Referenced:     %8lu kB\n"
-                          "Anonymous:      %8lu kB\n"
-                          "LazyFree:       %8lu kB\n"
-                          "AnonHugePages:  %8lu kB\n"
-                          "ShmemPmdMapped: %8lu kB\n"
-                          "Shared_Hugetlb: %8lu kB\n"
-                          "Private_Hugetlb: %7lu kB\n"
-                          "Swap:           %8lu kB\n"
-                          "SwapPss:        %8lu kB\n"
-                          "Locked:         %8lu kB\n",
-                          mss->resident >> 10,
-                          (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
-                          mss->shared_clean  >> 10,
-                          mss->shared_dirty  >> 10,
-                          mss->private_clean >> 10,
-                          mss->private_dirty >> 10,
-                          mss->referenced >> 10,
-                          mss->anonymous >> 10,
-                          mss->lazyfree >> 10,
-                          mss->anonymous_thp >> 10,
-                          mss->shmem_thp >> 10,
-                          mss->shared_hugetlb >> 10,
-                          mss->private_hugetlb >> 10,
-                          mss->swap >> 10,
-                          (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
-                          (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
+       if (!rollup_mode) {
+               SEQ_PUT_DEC("Size:           ", vma->vm_end - vma->vm_start);
+               SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+               SEQ_PUT_DEC(" kB\nMMUPageSize:    ", vma_mmu_pagesize(vma));
+               seq_puts(m, " kB\n");
+       }
 
+       if (!rollup_mode || last_vma) {
+               SEQ_PUT_DEC("Rss:            ", mss->resident);
+               SEQ_PUT_DEC(" kB\nPss:            ", mss->pss >> PSS_SHIFT);
+               SEQ_PUT_DEC(" kB\nShared_Clean:   ", mss->shared_clean);
+               SEQ_PUT_DEC(" kB\nShared_Dirty:   ", mss->shared_dirty);
+               SEQ_PUT_DEC(" kB\nPrivate_Clean:  ", mss->private_clean);
+               SEQ_PUT_DEC(" kB\nPrivate_Dirty:  ", mss->private_dirty);
+               SEQ_PUT_DEC(" kB\nReferenced:     ", mss->referenced);
+               SEQ_PUT_DEC(" kB\nAnonymous:      ", mss->anonymous);
+               SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
+               SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
+               SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+               SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+               seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
+                                         mss->private_hugetlb >> 10, 7);
+               SEQ_PUT_DEC(" kB\nSwap:           ", mss->swap);
+               SEQ_PUT_DEC(" kB\nSwapPss:        ",
+                                               mss->swap_pss >> PSS_SHIFT);
+               SEQ_PUT_DEC(" kB\nLocked:         ", mss->pss >> PSS_SHIFT);
+               seq_puts(m, " kB\n");
+       }
        if (!rollup_mode) {
                arch_show_smap(m, vma);
                show_smap_vma_flags(m, vma);
@@ -861,6 +841,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
        m_cache_vma(m, vma);
        return ret;
 }
+#undef SEQ_PUT_DEC
 
 static int show_pid_smap(struct seq_file *m, void *v)
 {
index 70057359fbaf3b7d98c8109dd0bcba1fe112b5cb..23148c3ed67560fa97a10e5ef36e1024ad95aee7 100644 (file)
@@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super_block *super,
        if (IS_ERR(journal->j_dev_bd)) {
                result = PTR_ERR(journal->j_dev_bd);
                journal->j_dev_bd = NULL;
-               reiserfs_warning(super,
+               reiserfs_warning(super, "sh-457",
                                 "journal_init_dev: Cannot open '%s': %i",
                                 jdev_name, result);
                return result;
index eea09f6d883056b8e6fcc0aad857ab1a76fdb3f1..c6c27f1f9c9850634700e4898adae6ab7755e113 100644 (file)
@@ -6,6 +6,7 @@
  * initial implementation -- AV, Oct 2001.
  */
 
+#include <linux/cache.h>
 #include <linux/fs.h>
 #include <linux/export.h>
 #include <linux/seq_file.h>
@@ -19,6 +20,8 @@
 #include <linux/uaccess.h>
 #include <asm/page.h>
 
+static struct kmem_cache *seq_file_cache __ro_after_init;
+
 static void seq_set_overflow(struct seq_file *m)
 {
        m->count = m->size;
@@ -26,7 +29,7 @@ static void seq_set_overflow(struct seq_file *m)
 
 static void *seq_buf_alloc(unsigned long size)
 {
-       return kvmalloc(size, GFP_KERNEL);
+       return kvmalloc(size, GFP_KERNEL_ACCOUNT);
 }
 
 /**
@@ -51,7 +54,7 @@ int seq_open(struct file *file, const struct seq_operations *op)
 
        WARN_ON(file->private_data);
 
-       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL);
        if (!p)
                return -ENOMEM;
 
@@ -366,7 +369,7 @@ int seq_release(struct inode *inode, struct file *file)
 {
        struct seq_file *m = file->private_data;
        kvfree(m->buf);
-       kfree(m);
+       kmem_cache_free(seq_file_cache, m);
        return 0;
 }
 EXPORT_SYMBOL(seq_release);
@@ -563,7 +566,7 @@ static void single_stop(struct seq_file *p, void *v)
 int single_open(struct file *file, int (*show)(struct seq_file *, void *),
                void *data)
 {
-       struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL);
+       struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL_ACCOUNT);
        int res = -ENOMEM;
 
        if (op) {
@@ -625,7 +628,7 @@ void *__seq_open_private(struct file *f, const struct seq_operations *ops,
        void *private;
        struct seq_file *seq;
 
-       private = kzalloc(psize, GFP_KERNEL);
+       private = kzalloc(psize, GFP_KERNEL_ACCOUNT);
        if (private == NULL)
                goto out;
 
@@ -673,29 +676,37 @@ void seq_puts(struct seq_file *m, const char *s)
 }
 EXPORT_SYMBOL(seq_puts);
 
-/*
+/**
  * A helper routine for putting decimal numbers without rich format of printf().
  * only 'unsigned long long' is supported.
- * This routine will put strlen(delimiter) + number into seq_file.
+ * @m: seq_file identifying the buffer to which data should be written
+ * @delimiter: a string which is printed before the number
+ * @num: the number
+ * @width: a minimum field width
+ *
+ * This routine will put strlen(delimiter) + number into seq_filed.
  * This routine is very quick when you show lots of numbers.
  * In usual cases, it will be better to use seq_printf(). It's easier to read.
  */
-void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
-                        unsigned long long num)
+void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
+                        unsigned long long num, unsigned int width)
 {
        int len;
 
        if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
                goto overflow;
 
-       len = strlen(delimiter);
-       if (m->count + len >= m->size)
-               goto overflow;
+       if (delimiter && delimiter[0]) {
+               if (delimiter[1] == 0)
+                       seq_putc(m, delimiter[0]);
+               else
+                       seq_puts(m, delimiter);
+       }
 
-       memcpy(m->buf + m->count, delimiter, len);
-       m->count += len;
+       if (!width)
+               width = 1;
 
-       if (m->count + 1 >= m->size)
+       if (m->count + width >= m->size)
                goto overflow;
 
        if (num < 10) {
@@ -703,7 +714,7 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
                return;
        }
 
-       len = num_to_str(m->buf + m->count, m->size - m->count, num);
+       len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
        if (!len)
                goto overflow;
 
@@ -713,8 +724,60 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 overflow:
        seq_set_overflow(m);
 }
+
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
+                        unsigned long long num)
+{
+       return seq_put_decimal_ull_width(m, delimiter, num, 0);
+}
 EXPORT_SYMBOL(seq_put_decimal_ull);
 
+/**
+ * seq_put_hex_ll - put a number in hexadecimal notation
+ * @m: seq_file identifying the buffer to which data should be written
+ * @delimiter: a string which is printed before the number
+ * @v: the number
+ * @width: a minimum field width
+ *
+ * seq_put_hex_ll(m, "", v, 8) is equal to seq_printf(m, "%08llx", v)
+ *
+ * This routine is very quick when you show lots of numbers.
+ * In usual cases, it will be better to use seq_printf(). It's easier to read.
+ */
+void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
+                               unsigned long long v, unsigned int width)
+{
+       unsigned int len;
+       int i;
+
+       if (delimiter && delimiter[0]) {
+               if (delimiter[1] == 0)
+                       seq_putc(m, delimiter[0]);
+               else
+                       seq_puts(m, delimiter);
+       }
+
+       /* If x is 0, the result of __builtin_clzll is undefined */
+       if (v == 0)
+               len = 1;
+       else
+               len = (sizeof(v) * 8 - __builtin_clzll(v) + 3) / 4;
+
+       if (len < width)
+               len = width;
+
+       if (m->count + len > m->size) {
+               seq_set_overflow(m);
+               return;
+       }
+
+       for (i = len - 1; i >= 0; i--) {
+               m->buf[m->count + i] = hex_asc[0xf & v];
+               v = v >> 4;
+       }
+       m->count += len;
+}
+
 void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num)
 {
        int len;
@@ -722,12 +785,12 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
        if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
                goto overflow;
 
-       len = strlen(delimiter);
-       if (m->count + len >= m->size)
-               goto overflow;
-
-       memcpy(m->buf + m->count, delimiter, len);
-       m->count += len;
+       if (delimiter && delimiter[0]) {
+               if (delimiter[1] == 0)
+                       seq_putc(m, delimiter[0]);
+               else
+                       seq_puts(m, delimiter);
+       }
 
        if (m->count + 2 >= m->size)
                goto overflow;
@@ -742,7 +805,7 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
                return;
        }
 
-       len = num_to_str(m->buf + m->count, m->size - m->count, num);
+       len = num_to_str(m->buf + m->count, m->size - m->count, num, 0);
        if (!len)
                goto overflow;
 
@@ -782,8 +845,14 @@ EXPORT_SYMBOL(seq_write);
 void seq_pad(struct seq_file *m, char c)
 {
        int size = m->pad_until - m->count;
-       if (size > 0)
-               seq_printf(m, "%*s", size, "");
+       if (size > 0) {
+               if (size + m->count > m->size) {
+                       seq_set_overflow(m);
+                       return;
+               }
+               memset(m->buf + m->count, ' ', size);
+               m->count += size;
+       }
        if (c)
                seq_putc(m, c);
 }
@@ -1040,3 +1109,8 @@ seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head,
        return NULL;
 }
 EXPORT_SYMBOL(seq_hlist_next_percpu);
+
+void __init seq_file_init(void)
+{
+       seq_file_cache = KMEM_CACHE(seq_file, SLAB_ACCOUNT|SLAB_PANIC);
+}
index 2dcf3d473fec1d406bf0fc2d14d73828ff81f662..9571616b5ddaf1dbee1286dcbc0870b1df11d9b8 100644 (file)
@@ -632,7 +632,7 @@ static int scan_for_idx_cb(struct ubifs_info *c,
  */
 static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
 {
-       struct ubifs_lprops *lprops;
+       const struct ubifs_lprops *lprops;
        struct scan_data data;
        int err;
 
index 6c3a1abd0e22c9e033da093e4f21b4d3ad9898b2..f5a46844340c0a6e8bde656f2ea48cc513d6900d 100644 (file)
@@ -244,7 +244,6 @@ static void remove_from_lpt_heap(struct ubifs_info *c,
 /**
  * lpt_heap_replace - replace lprops in a category heap.
  * @c: UBIFS file-system description object
- * @old_lprops: LEB properties to replace
  * @new_lprops: LEB properties with which to replace
  * @cat: LEB category
  *
@@ -254,7 +253,6 @@ static void remove_from_lpt_heap(struct ubifs_info *c,
  * lprops.  This function does that.
  */
 static void lpt_heap_replace(struct ubifs_info *c,
-                            struct ubifs_lprops *old_lprops,
                             struct ubifs_lprops *new_lprops, int cat)
 {
        struct ubifs_lpt_heap *heap;
@@ -362,7 +360,7 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
        case LPROPS_DIRTY:
        case LPROPS_DIRTY_IDX:
        case LPROPS_FREE:
-               lpt_heap_replace(c, old_lprops, new_lprops, cat);
+               lpt_heap_replace(c, new_lprops, cat);
                break;
        case LPROPS_UNCAT:
        case LPROPS_EMPTY:
index aab87340d3de8883c12bd888056d51efe454b9c8..16f03d9929e5ed7d90366992726793db16d1177d 100644 (file)
@@ -175,7 +175,6 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
 void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
                    int lnum, int offs)
 {
-       lnum = lnum;
        dbg_scan("stop scanning LEB %d at offset %d", lnum, offs);
        ubifs_assert(offs % c->min_io_size == 0);
 
index b16ef162344ab382143578d277248c88f2fdc067..6c397a389105a68f75b6c3dbaba9d7bf3e01c5af 100644 (file)
@@ -1737,8 +1737,11 @@ static void ubifs_remount_ro(struct ubifs_info *c)
 
        dbg_save_space_info(c);
 
-       for (i = 0; i < c->jhead_cnt; i++)
-               ubifs_wbuf_sync(&c->jheads[i].wbuf);
+       for (i = 0; i < c->jhead_cnt; i++) {
+               err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+               if (err)
+                       ubifs_ro_mode(c, err);
+       }
 
        c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
        c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
@@ -1804,8 +1807,11 @@ static void ubifs_put_super(struct super_block *sb)
                        int err;
 
                        /* Synchronize write-buffers */
-                       for (i = 0; i < c->jhead_cnt; i++)
-                               ubifs_wbuf_sync(&c->jheads[i].wbuf);
+                       for (i = 0; i < c->jhead_cnt; i++) {
+                               err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+                               if (err)
+                                       ubifs_ro_mode(c, err);
+                       }
 
                        /*
                         * We are being cleanly unmounted which means the
index 31f1f10eecd10888924ac778f9a925785b0f82d0..0ab824f574ed1488087cfefcfa4ad4d0a7539d7b 100644 (file)
@@ -1195,16 +1195,22 @@ xfs_vm_writepages(
        int                     ret;
 
        xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
-       if (dax_mapping(mapping))
-               return dax_writeback_mapping_range(mapping,
-                               xfs_find_bdev_for_inode(mapping->host), wbc);
-
        ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
        if (wpc.ioend)
                ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
        return ret;
 }
 
+STATIC int
+xfs_dax_writepages(
+       struct address_space    *mapping,
+       struct writeback_control *wbc)
+{
+       xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+       return dax_writeback_mapping_range(mapping,
+                       xfs_find_bdev_for_inode(mapping->host), wbc);
+}
+
 /*
  * Called to move a page into cleanable state - and from there
  * to be released. The page should already be clean. We always
@@ -1367,17 +1373,6 @@ out_unlock:
        return error;
 }
 
-STATIC ssize_t
-xfs_vm_direct_IO(
-       struct kiocb            *iocb,
-       struct iov_iter         *iter)
-{
-       /*
-        * We just need the method present so that open/fcntl allow direct I/O.
-        */
-       return -EINVAL;
-}
-
 STATIC sector_t
 xfs_vm_bmap(
        struct address_space    *mapping,
@@ -1472,19 +1467,8 @@ xfs_vm_set_page_dirty(
        newly_dirty = !TestSetPageDirty(page);
        spin_unlock(&mapping->private_lock);
 
-       if (newly_dirty) {
-               /* sigh - __set_page_dirty() is static, so copy it here, too */
-               unsigned long flags;
-
-               spin_lock_irqsave(&mapping->tree_lock, flags);
-               if (page->mapping) {    /* Race with truncate? */
-                       WARN_ON_ONCE(!PageUptodate(page));
-                       account_page_dirtied(page, mapping);
-                       radix_tree_tag_set(&mapping->page_tree,
-                                       page_index(page), PAGECACHE_TAG_DIRTY);
-               }
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
-       }
+       if (newly_dirty)
+               __set_page_dirty(page, mapping, 1);
        unlock_page_memcg(page);
        if (newly_dirty)
                __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1500,8 +1484,15 @@ const struct address_space_operations xfs_address_space_operations = {
        .releasepage            = xfs_vm_releasepage,
        .invalidatepage         = xfs_vm_invalidatepage,
        .bmap                   = xfs_vm_bmap,
-       .direct_IO              = xfs_vm_direct_IO,
+       .direct_IO              = noop_direct_IO,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
        .error_remove_page      = generic_error_remove_page,
 };
+
+const struct address_space_operations xfs_dax_aops = {
+       .writepages             = xfs_dax_writepages,
+       .direct_IO              = noop_direct_IO,
+       .set_page_dirty         = noop_set_page_dirty,
+       .invalidatepage         = noop_invalidatepage,
+};
index 88c85ea63da030ce8d02629ad6aa1fe0bb3971ad..69346d460dfaf7c613c71f3c12326291c5f68033 100644 (file)
@@ -54,6 +54,7 @@ struct xfs_ioend {
 };
 
 extern const struct address_space_operations xfs_address_space_operations;
+extern const struct address_space_operations xfs_dax_aops;
 
 int    xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
 
index e0307fbff911e5d28dadf897a033aba1954e39b3..154725b1b813b841c96b7912bcbf8492db2b660d 100644 (file)
@@ -1285,7 +1285,10 @@ xfs_setup_iops(
        case S_IFREG:
                inode->i_op = &xfs_inode_operations;
                inode->i_fop = &xfs_file_operations;
-               inode->i_mapping->a_ops = &xfs_address_space_operations;
+               if (IS_DAX(inode))
+                       inode->i_mapping->a_ops = &xfs_dax_aops;
+               else
+                       inode->i_mapping->a_ops = &xfs_address_space_operations;
                break;
        case S_IFDIR:
                if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
index d591bb77f592b21a56a0496d223ac0455b31b359..40a916efd7c039d2132014fcaf5ec780e4a8248e 100644 (file)
@@ -254,6 +254,8 @@ int acpi_processor_pstate_control(void);
 /* note: this locks both the calling module and the processor module
          if a _PPC object exists, rmmod is disallowed then */
 int acpi_processor_notify_smm(struct module *calling_module);
+int acpi_processor_get_psd(acpi_handle handle,
+                          struct acpi_psd_package *pdomain);
 
 /* parsing the _P* objects. */
 extern int acpi_processor_get_performance_info(struct acpi_processor *pr);
index 04c4cc6fd820ae28b7a072d47e5f8d74acfe9f5e..66d1d45fa2e1c11dcf17231f0a32f883bb2c1a12 100644 (file)
 #define mmiowb() do {} while (0)
 #endif
 
+#ifndef __io_br
+#define __io_br()      barrier()
+#endif
+
+/* prevent prefetching of coherent DMA data ahead of a dma-complete */
+#ifndef __io_ar
+#ifdef rmb
+#define __io_ar()      rmb()
+#else
+#define __io_ar()      barrier()
+#endif
+#endif
+
+/* flush writes to coherent DMA data before possibly triggering a DMA read */
+#ifndef __io_bw
+#ifdef wmb
+#define __io_bw()      wmb()
+#else
+#define __io_bw()      barrier()
+#endif
+#endif
+
+/* serialize device access against a spin_unlock, usually handled there. */
+#ifndef __io_aw
+#define __io_aw()      barrier()
+#endif
+
+#ifndef __io_pbw
+#define __io_pbw()     __io_bw()
+#endif
+
+#ifndef __io_paw
+#define __io_paw()     __io_aw()
+#endif
+
+#ifndef __io_pbr
+#define __io_pbr()     __io_br()
+#endif
+
+#ifndef __io_par
+#define __io_par()     __io_ar()
+#endif
+
+
 /*
  * __raw_{read,write}{b,w,l,q}() access memory in native endianness.
  *
@@ -110,7 +154,12 @@ static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
 #define readb readb
 static inline u8 readb(const volatile void __iomem *addr)
 {
-       return __raw_readb(addr);
+       u8 val;
+
+       __io_br();
+       val = __raw_readb(addr);
+       __io_ar();
+       return val;
 }
 #endif
 
@@ -118,7 +167,12 @@ static inline u8 readb(const volatile void __iomem *addr)
 #define readw readw
 static inline u16 readw(const volatile void __iomem *addr)
 {
-       return __le16_to_cpu(__raw_readw(addr));
+       u16 val;
+
+       __io_br();
+       val = __le16_to_cpu(__raw_readw(addr));
+       __io_ar();
+       return val;
 }
 #endif
 
@@ -126,7 +180,12 @@ static inline u16 readw(const volatile void __iomem *addr)
 #define readl readl
 static inline u32 readl(const volatile void __iomem *addr)
 {
-       return __le32_to_cpu(__raw_readl(addr));
+       u32 val;
+
+       __io_br();
+       val = __le32_to_cpu(__raw_readl(addr));
+       __io_ar();
+       return val;
 }
 #endif
 
@@ -135,7 +194,12 @@ static inline u32 readl(const volatile void __iomem *addr)
 #define readq readq
 static inline u64 readq(const volatile void __iomem *addr)
 {
-       return __le64_to_cpu(__raw_readq(addr));
+       u64 val;
+
+       __io_br();
+       val = __le64_to_cpu(__raw_readq(addr));
+       __io_ar();
+       return val;
 }
 #endif
 #endif /* CONFIG_64BIT */
@@ -144,7 +208,9 @@ static inline u64 readq(const volatile void __iomem *addr)
 #define writeb writeb
 static inline void writeb(u8 value, volatile void __iomem *addr)
 {
+       __io_bw();
        __raw_writeb(value, addr);
+       __io_aw();
 }
 #endif
 
@@ -152,7 +218,9 @@ static inline void writeb(u8 value, volatile void __iomem *addr)
 #define writew writew
 static inline void writew(u16 value, volatile void __iomem *addr)
 {
+       __io_bw();
        __raw_writew(cpu_to_le16(value), addr);
+       __io_aw();
 }
 #endif
 
@@ -160,7 +228,9 @@ static inline void writew(u16 value, volatile void __iomem *addr)
 #define writel writel
 static inline void writel(u32 value, volatile void __iomem *addr)
 {
+       __io_bw();
        __raw_writel(__cpu_to_le32(value), addr);
+       __io_aw();
 }
 #endif
 
@@ -169,7 +239,9 @@ static inline void writel(u32 value, volatile void __iomem *addr)
 #define writeq writeq
 static inline void writeq(u64 value, volatile void __iomem *addr)
 {
+       __io_bw();
        __raw_writeq(__cpu_to_le64(value), addr);
+       __io_aw();
 }
 #endif
 #endif /* CONFIG_64BIT */
@@ -180,35 +252,67 @@ static inline void writeq(u64 value, volatile void __iomem *addr)
  * accesses.
  */
 #ifndef readb_relaxed
-#define readb_relaxed readb
+#define readb_relaxed readb_relaxed
+static inline u8 readb_relaxed(const volatile void __iomem *addr)
+{
+       return __raw_readb(addr);
+}
 #endif
 
 #ifndef readw_relaxed
-#define readw_relaxed readw
+#define readw_relaxed readw_relaxed
+static inline u16 readw_relaxed(const volatile void __iomem *addr)
+{
+       return __le16_to_cpu(__raw_readw(addr));
+}
 #endif
 
 #ifndef readl_relaxed
-#define readl_relaxed readl
+#define readl_relaxed readl_relaxed
+static inline u32 readl_relaxed(const volatile void __iomem *addr)
+{
+       return __le32_to_cpu(__raw_readl(addr));
+}
 #endif
 
 #if defined(readq) && !defined(readq_relaxed)
-#define readq_relaxed readq
+#define readq_relaxed readq_relaxed
+static inline u64 readq_relaxed(const volatile void __iomem *addr)
+{
+       return __le64_to_cpu(__raw_readq(addr));
+}
 #endif
 
 #ifndef writeb_relaxed
-#define writeb_relaxed writeb
+#define writeb_relaxed writeb_relaxed
+static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
+{
+       __raw_writeb(value, addr);
+}
 #endif
 
 #ifndef writew_relaxed
-#define writew_relaxed writew
+#define writew_relaxed writew_relaxed
+static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
+{
+       __raw_writew(cpu_to_le16(value), addr);
+}
 #endif
 
 #ifndef writel_relaxed
-#define writel_relaxed writel
+#define writel_relaxed writel_relaxed
+static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
+{
+       __raw_writel(__cpu_to_le32(value), addr);
+}
 #endif
 
 #if defined(writeq) && !defined(writeq_relaxed)
-#define writeq_relaxed writeq
+#define writeq_relaxed writeq_relaxed
+static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
+{
+       __raw_writeq(__cpu_to_le64(value), addr);
+}
 #endif
 
 /*
@@ -363,7 +467,12 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer,
 #define inb inb
 static inline u8 inb(unsigned long addr)
 {
-       return readb(PCI_IOBASE + addr);
+       u8 val;
+
+       __io_pbr();
+       val = __raw_readb(PCI_IOBASE + addr);
+       __io_par();
+       return val;
 }
 #endif
 
@@ -371,7 +480,12 @@ static inline u8 inb(unsigned long addr)
 #define inw inw
 static inline u16 inw(unsigned long addr)
 {
-       return readw(PCI_IOBASE + addr);
+       u16 val;
+
+       __io_pbr();
+       val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr));
+       __io_par();
+       return val;
 }
 #endif
 
@@ -379,7 +493,12 @@ static inline u16 inw(unsigned long addr)
 #define inl inl
 static inline u32 inl(unsigned long addr)
 {
-       return readl(PCI_IOBASE + addr);
+       u32 val;
+
+       __io_pbr();
+       val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr));
+       __io_par();
+       return val;
 }
 #endif
 
@@ -387,7 +506,9 @@ static inline u32 inl(unsigned long addr)
 #define outb outb
 static inline void outb(u8 value, unsigned long addr)
 {
-       writeb(value, PCI_IOBASE + addr);
+       __io_pbw();
+       __raw_writeb(value, PCI_IOBASE + addr);
+       __io_paw();
 }
 #endif
 
@@ -395,7 +516,9 @@ static inline void outb(u8 value, unsigned long addr)
 #define outw outw
 static inline void outw(u16 value, unsigned long addr)
 {
-       writew(value, PCI_IOBASE + addr);
+       __io_pbw();
+       __raw_writew(cpu_to_le16(value), PCI_IOBASE + addr);
+       __io_paw();
 }
 #endif
 
@@ -403,7 +526,9 @@ static inline void outw(u16 value, unsigned long addr)
 #define outl outl
 static inline void outl(u32 value, unsigned long addr)
 {
-       writel(value, PCI_IOBASE + addr);
+       __io_pbw();
+       __raw_writel(cpu_to_le32(value), PCI_IOBASE + addr);
+       __io_paw();
 }
 #endif
 
index 2f7a29242b87772ba8cb3f352949c0a83191f2b4..38cd77b39a64a6731be68f3bdef40cbca35478dc 100644 (file)
@@ -26,7 +26,8 @@
 #define IORT_IRQ_MASK(irq)             (irq & 0xffffffffULL)
 #define IORT_IRQ_TRIGGER_MASK(irq)     ((irq >> 32) & 0xffffffffULL)
 
-int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node);
+int iort_register_domain_token(int trans_id, phys_addr_t base,
+                              struct fwnode_handle *fw_node);
 void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
 #ifdef CONFIG_ACPI_IORT
@@ -38,6 +39,7 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
 /* IOMMU interface */
 void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
 const struct iommu_ops *iort_iommu_configure(struct device *dev);
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
 #else
 static inline void acpi_iort_init(void) { }
 static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
@@ -52,6 +54,9 @@ static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
 static inline const struct iommu_ops *iort_iommu_configure(
                                      struct device *dev)
 { return NULL; }
+static inline
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+{ return 0; }
 #endif
 
 #endif /* __ACPI_IORT_H__ */
index 3e4ce54d84ab2f50293a6f11dbe5d10ce17989d3..09da0f1246995cf19c04ec35a25733a0119fb4ad 100644 (file)
@@ -175,7 +175,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 }
 
 long congestion_wait(int sync, long timeout);
-long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
+long wait_iff_congested(int sync, long timeout);
 
 static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi)
 {
@@ -329,7 +329,7 @@ static inline bool inode_to_wb_is_valid(struct inode *inode)
  * @inode: inode of interest
  *
  * Returns the wb @inode is currently associated with.  The caller must be
- * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the
+ * holding either @inode->i_lock, the i_pages lock, or the
  * associated wb's list_lock.
  */
 static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
@@ -337,7 +337,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 #ifdef CONFIG_LOCKDEP
        WARN_ON_ONCE(debug_locks &&
                     (!lockdep_is_held(&inode->i_lock) &&
-                     !lockdep_is_held(&inode->i_mapping->tree_lock) &&
+                     !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
                      !lockdep_is_held(&inode->i_wb->list_lock)));
 #endif
        return inode->i_wb;
@@ -349,7 +349,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
  * @lockedp: temp bool output param, to be passed to the end function
  *
  * The caller wants to access the wb associated with @inode but isn't
- * holding inode->i_lock, mapping->tree_lock or wb->list_lock.  This
+ * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
  * function determines the wb associated with @inode and ensures that the
  * association doesn't change until the transaction is finished with
  * unlocked_inode_to_wb_end().
@@ -370,11 +370,11 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
        *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 
        if (unlikely(*lockedp))
-               spin_lock_irq(&inode->i_mapping->tree_lock);
+               xa_lock_irq(&inode->i_mapping->i_pages);
 
        /*
-        * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
-        * inode_to_wb() will bark.  Deref directly.
+        * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
+        * lock.  inode_to_wb() will bark.  Deref directly.
         */
        return inode->i_wb;
 }
@@ -387,7 +387,7 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 {
        if (unlikely(locked))
-               spin_unlock_irq(&inode->i_mapping->tree_lock);
+               xa_unlock_irq(&inode->i_mapping->i_pages);
 
        rcu_read_unlock();
 }
index b0abe21d6cc9a18310bcb9eea6b16ab762be5272..4955e0863b83d456ba467f291d335b01bc1aa2a6 100644 (file)
@@ -61,6 +61,8 @@ struct linux_binprm {
        unsigned interp_flags;
        unsigned interp_data;
        unsigned long loader, exec;
+
+       struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
 } __randomize_layout;
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
@@ -118,6 +120,7 @@ extern int __must_check remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *);
 extern int flush_old_exec(struct linux_binprm * bprm);
 extern void setup_new_exec(struct linux_binprm * bprm);
+extern void finalize_exec(struct linux_binprm *bprm);
 extern void would_dump(struct linux_binprm *, struct file *);
 
 extern int suid_dumpable;
index 59042d5ac52026a5f43416e6a92a2b70166e63b3..3901927cf6a0618122798e8b16491099d35e2cda 100644 (file)
@@ -204,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
         CEPH_FEATURE_OSD_PRIMARY_AFFINITY |    \
         CEPH_FEATURE_MSGR_KEEPALIVE2 |         \
         CEPH_FEATURE_OSD_POOLRESEND |          \
+        CEPH_FEATURE_MDS_QUOTA |               \
         CEPH_FEATURE_CRUSH_V4 |                \
         CEPH_FEATURE_NEW_OSDOP_ENCODING |      \
         CEPH_FEATURE_SERVER_JEWEL |            \
index 88dd51381aaf9d72e18f277dee6173a214bcb8c8..7ecfc88314d835605d72189e7458f362bb6a1632 100644 (file)
@@ -134,6 +134,7 @@ struct ceph_dir_layout {
 #define CEPH_MSG_CLIENT_LEASE           0x311
 #define CEPH_MSG_CLIENT_SNAP            0x312
 #define CEPH_MSG_CLIENT_CAPRELEASE      0x313
+#define CEPH_MSG_CLIENT_QUOTA           0x314
 
 /* pool ops */
 #define CEPH_MSG_POOLOP_REPLY           48
@@ -807,4 +808,20 @@ struct ceph_mds_snap_realm {
 } __attribute__ ((packed));
 /* followed by my snap list, then prior parent snap list */
 
+/*
+ * quotas
+ */
+struct ceph_mds_quota {
+       __le64 ino;             /* ino */
+       struct ceph_timespec rctime;
+       __le64 rbytes;          /* dir stats */
+       __le64 rfiles;
+       __le64 rsubdirs;
+       __u8 struct_v;          /* compat */
+       __u8 struct_compat;
+       __le32 struct_len;
+       __le64 max_bytes;       /* quota max. bytes */
+       __le64 max_files;       /* quota max. files */
+} __attribute__ ((packed));
+
 #endif
index c2ec44cf5098af4d247cc22d124625eef32fb471..49c93b9308d70543c865f437bd3390ef6e7be73e 100644 (file)
@@ -262,6 +262,7 @@ extern struct kmem_cache *ceph_cap_cachep;
 extern struct kmem_cache *ceph_cap_flush_cachep;
 extern struct kmem_cache *ceph_dentry_cachep;
 extern struct kmem_cache *ceph_file_cachep;
+extern struct kmem_cache *ceph_dir_file_cachep;
 
 /* ceph_common.c */
 extern bool libceph_compatible(void *data);
index ead9d85f1c1114412c4f83688e377e7f9f0c6294..c7dfcb8a1fb2fcb5e253ab0202955a0752d5b844 100644 (file)
@@ -76,6 +76,7 @@ enum ceph_msg_data_type {
 #ifdef CONFIG_BLOCK
        CEPH_MSG_DATA_BIO,      /* data source/destination is a bio list */
 #endif /* CONFIG_BLOCK */
+       CEPH_MSG_DATA_BVECS,    /* data source/destination is a bio_vec array */
 };
 
 static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
@@ -87,22 +88,106 @@ static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
 #ifdef CONFIG_BLOCK
        case CEPH_MSG_DATA_BIO:
 #endif /* CONFIG_BLOCK */
+       case CEPH_MSG_DATA_BVECS:
                return true;
        default:
                return false;
        }
 }
 
+#ifdef CONFIG_BLOCK
+
+struct ceph_bio_iter {
+       struct bio *bio;
+       struct bvec_iter iter;
+};
+
+#define __ceph_bio_iter_advance_step(it, n, STEP) do {                       \
+       unsigned int __n = (n), __cur_n;                                      \
+                                                                             \
+       while (__n) {                                                         \
+               BUG_ON(!(it)->iter.bi_size);                                  \
+               __cur_n = min((it)->iter.bi_size, __n);                       \
+               (void)(STEP);                                                 \
+               bio_advance_iter((it)->bio, &(it)->iter, __cur_n);            \
+               if (!(it)->iter.bi_size && (it)->bio->bi_next) {              \
+                       dout("__ceph_bio_iter_advance_step next bio\n");      \
+                       (it)->bio = (it)->bio->bi_next;                       \
+                       (it)->iter = (it)->bio->bi_iter;                      \
+               }                                                             \
+               __n -= __cur_n;                                               \
+       }                                                                     \
+} while (0)
+
+/*
+ * Advance @it by @n bytes.
+ */
+#define ceph_bio_iter_advance(it, n)                                         \
+       __ceph_bio_iter_advance_step(it, n, 0)
+
+/*
+ * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
+ */
+#define ceph_bio_iter_advance_step(it, n, BVEC_STEP)                         \
+       __ceph_bio_iter_advance_step(it, n, ({                                \
+               struct bio_vec bv;                                            \
+               struct bvec_iter __cur_iter;                                  \
+                                                                             \
+               __cur_iter = (it)->iter;                                      \
+               __cur_iter.bi_size = __cur_n;                                 \
+               __bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \
+                       (void)(BVEC_STEP);                                    \
+       }))
+
+#endif /* CONFIG_BLOCK */
+
+struct ceph_bvec_iter {
+       struct bio_vec *bvecs;
+       struct bvec_iter iter;
+};
+
+#define __ceph_bvec_iter_advance_step(it, n, STEP) do {                              \
+       BUG_ON((n) > (it)->iter.bi_size);                                     \
+       (void)(STEP);                                                         \
+       bvec_iter_advance((it)->bvecs, &(it)->iter, (n));                     \
+} while (0)
+
+/*
+ * Advance @it by @n bytes.
+ */
+#define ceph_bvec_iter_advance(it, n)                                        \
+       __ceph_bvec_iter_advance_step(it, n, 0)
+
+/*
+ * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
+ */
+#define ceph_bvec_iter_advance_step(it, n, BVEC_STEP)                        \
+       __ceph_bvec_iter_advance_step(it, n, ({                               \
+               struct bio_vec bv;                                            \
+               struct bvec_iter __cur_iter;                                  \
+                                                                             \
+               __cur_iter = (it)->iter;                                      \
+               __cur_iter.bi_size = (n);                                     \
+               for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter)        \
+                       (void)(BVEC_STEP);                                    \
+       }))
+
+#define ceph_bvec_iter_shorten(it, n) do {                                   \
+       BUG_ON((n) > (it)->iter.bi_size);                                     \
+       (it)->iter.bi_size = (n);                                             \
+} while (0)
+
 struct ceph_msg_data {
        struct list_head                links;  /* ceph_msg->data */
        enum ceph_msg_data_type         type;
        union {
 #ifdef CONFIG_BLOCK
                struct {
-                       struct bio      *bio;
-                       size_t          bio_length;
+                       struct ceph_bio_iter    bio_pos;
+                       u32                     bio_length;
                };
 #endif /* CONFIG_BLOCK */
+               struct ceph_bvec_iter   bvec_pos;
                struct {
                        struct page     **pages;        /* NOT OWNER. */
                        size_t          length;         /* total # bytes */
@@ -122,11 +207,9 @@ struct ceph_msg_data_cursor {
        bool                    need_crc;       /* crc update needed */
        union {
 #ifdef CONFIG_BLOCK
-               struct {                                /* bio */
-                       struct bio      *bio;           /* bio from list */
-                       struct bvec_iter bvec_iter;
-               };
+               struct ceph_bio_iter    bio_iter;
 #endif /* CONFIG_BLOCK */
+               struct bvec_iter        bvec_iter;
                struct {                                /* pages */
                        unsigned int    page_offset;    /* offset in page */
                        unsigned short  page_index;     /* index in array */
@@ -290,9 +373,11 @@ extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
 extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
                                struct ceph_pagelist *pagelist);
 #ifdef CONFIG_BLOCK
-extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
-                               size_t length);
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
+                          u32 length);
 #endif /* CONFIG_BLOCK */
+void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
+                            struct ceph_bvec_iter *bvec_pos);
 
 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
                                     bool can_fail);
index 52fb37d1c2a5fa2fcd5d54c71dfe231bb926f4be..528ccc943cee0a5da4bc95e5d200a8591debe836 100644 (file)
@@ -57,6 +57,7 @@ enum ceph_osd_data_type {
 #ifdef CONFIG_BLOCK
        CEPH_OSD_DATA_TYPE_BIO,
 #endif /* CONFIG_BLOCK */
+       CEPH_OSD_DATA_TYPE_BVECS,
 };
 
 struct ceph_osd_data {
@@ -72,10 +73,11 @@ struct ceph_osd_data {
                struct ceph_pagelist    *pagelist;
 #ifdef CONFIG_BLOCK
                struct {
-                       struct bio      *bio;           /* list of bios */
-                       size_t          bio_length;     /* total in list */
+                       struct ceph_bio_iter    bio_pos;
+                       u32                     bio_length;
                };
 #endif /* CONFIG_BLOCK */
+               struct ceph_bvec_iter   bvec_pos;
        };
 };
 
@@ -405,10 +407,14 @@ extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
                                        unsigned int which,
                                        struct ceph_pagelist *pagelist);
 #ifdef CONFIG_BLOCK
-extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
-                                       unsigned int which,
-                                       struct bio *bio, size_t bio_length);
+void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
+                                   unsigned int which,
+                                   struct ceph_bio_iter *bio_pos,
+                                   u32 bio_length);
 #endif /* CONFIG_BLOCK */
+void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
+                                        unsigned int which,
+                                        struct ceph_bvec_iter *bvec_pos);
 
 extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
                                        unsigned int which,
@@ -418,6 +424,9 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
                                        struct page **pages, u64 length,
                                        u32 alignment, bool pages_from_pool,
                                        bool own_pages);
+void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
+                                      unsigned int which,
+                                      struct bio_vec *bvecs, u32 bytes);
 extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
                                        unsigned int which,
                                        struct page **pages, u64 length,
index d41fad99c0fa7ece3c05fa1478f162c82da7e2ff..e71fb222c7c3640e46426dc5a0de96d6814d96df 100644 (file)
@@ -5,7 +5,6 @@
 #include <linux/rbtree.h>
 #include <linux/ceph/types.h>
 #include <linux/ceph/decode.h>
-#include <linux/ceph/ceph_fs.h>
 #include <linux/crush/crush.h>
 
 /*
@@ -280,11 +279,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
                       const struct ceph_osds *new_acting,
                       bool any_change);
 
-/* calculate mapping of a file extent to an object */
-extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-                                        u64 off, u64 len,
-                                        u64 *bno, u64 *oxoff, u64 *oxlen);
-
 int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
                                const struct ceph_object_id *oid,
                                const struct ceph_object_locator *oloc,
diff --git a/include/linux/ceph/striper.h b/include/linux/ceph/striper.h
new file mode 100644 (file)
index 0000000..cbd0d24
--- /dev/null
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CEPH_STRIPER_H
+#define _LINUX_CEPH_STRIPER_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+struct ceph_file_layout;
+
+void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
+                                  u64 off, u64 len,
+                                  u64 *objno, u64 *objoff, u32 *xlen);
+
+struct ceph_object_extent {
+       struct list_head oe_item;
+       u64 oe_objno;
+       u64 oe_off;
+       u64 oe_len;
+};
+
+static inline void ceph_object_extent_init(struct ceph_object_extent *ex)
+{
+       INIT_LIST_HEAD(&ex->oe_item);
+}
+
+/*
+ * Called for each mapped stripe unit.
+ *
+ * @bytes: number of bytes mapped, i.e. the minimum of the full length
+ *         requested (file extent length) or the remainder of the stripe
+ *         unit within an object
+ */
+typedef void (*ceph_object_extent_fn_t)(struct ceph_object_extent *ex,
+                                       u32 bytes, void *arg);
+
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+                        struct list_head *object_extents,
+                        struct ceph_object_extent *alloc_fn(void *arg),
+                        void *alloc_arg,
+                        ceph_object_extent_fn_t action_fn,
+                        void *action_arg);
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+                        struct list_head *object_extents,
+                        ceph_object_extent_fn_t action_fn,
+                        void *action_arg);
+
+struct ceph_file_extent {
+       u64 fe_off;
+       u64 fe_len;
+};
+
+static inline u64 ceph_file_extents_bytes(struct ceph_file_extent *file_extents,
+                                         u32 num_file_extents)
+{
+       u64 bytes = 0;
+       u32 i;
+
+       for (i = 0; i < num_file_extents; i++)
+               bytes += file_extents[i].fe_len;
+
+       return bytes;
+}
+
+int ceph_extent_to_file(struct ceph_file_layout *l,
+                       u64 objno, u64 objoff, u64 objlen,
+                       struct ceph_file_extent **file_extents,
+                       u32 *num_file_extents);
+
+#endif
index d3f264a5b04d9c999a94273c18272416576b17c1..ceb96ecab96e255be42cecc015be8c0e01b4f710 100644 (file)
@@ -17,9 +17,6 @@
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-#define randomized_struct_fields_start struct {
-#define randomized_struct_fields_end   };
-
 /* all clang versions usable with the kernel support KASAN ABI version 5 */
 #define KASAN_ABI_VERSION 5
 
index e2c7f4369effdbcf9cb46b1904c3cbe84debd2ca..b4bf73f5e38f0018ea85a85d7d8b1293222ad02c 100644 (file)
 #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
 #define __randomize_layout __attribute__((randomize_layout))
 #define __no_randomize_layout __attribute__((no_randomize_layout))
+/* This anon struct can add padding, so only enable it under randstruct. */
+#define randomized_struct_fields_start struct {
+#define randomized_struct_fields_end   } __randomize_layout;
 #endif
 
 #endif /* GCC_VERSION >= 40500 */
  */
 #define __visible      __attribute__((externally_visible))
 
-/*
- * RANDSTRUCT_PLUGIN wants to use an anonymous struct, but it is only
- * possible since GCC 4.6. To provide as much build testing coverage
- * as possible, this is used for all GCC 4.6+ builds, and not just on
- * RANDSTRUCT_PLUGIN builds.
- */
-#define randomized_struct_fields_start struct {
-#define randomized_struct_fields_end   } __randomize_layout;
-
 #endif /* GCC_VERSION >= 40600 */
 
 
diff --git a/include/linux/const.h b/include/linux/const.h
new file mode 100644 (file)
index 0000000..7b55a55
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _LINUX_CONST_H
+#define _LINUX_CONST_H
+
+#include <uapi/linux/const.h>
+
+#define UL(x)          (_UL(x))
+#define ULL(x)         (_ULL(x))
+
+#endif /* _LINUX_CONST_H */
index 1fe49724da9e4e5ad5cc77342249b17273c9463d..87f48dd932eb1ed424c062c8b3e9bb8ab4365cc4 100644 (file)
@@ -960,8 +960,6 @@ extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
 extern struct freq_attr *cpufreq_generic_attr[];
-int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
-                                     struct cpufreq_frequency_table *table);
 int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy);
 
 unsigned int cpufreq_generic_get(unsigned int cpu);
index a806e94c482f98805ee6a68d3f9a8055f5ca77b2..1eefabf1621f40bd9269804c35bebbc539747515 100644 (file)
@@ -135,7 +135,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv,
                                  struct cpuidle_device *dev);
 
 extern int cpuidle_select(struct cpuidle_driver *drv,
-                         struct cpuidle_device *dev);
+                         struct cpuidle_device *dev,
+                         bool *stop_tick);
 extern int cpuidle_enter(struct cpuidle_driver *drv,
                         struct cpuidle_device *dev, int index);
 extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
@@ -167,7 +168,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
                                         struct cpuidle_device *dev)
 {return true; }
 static inline int cpuidle_select(struct cpuidle_driver *drv,
-                                struct cpuidle_device *dev)
+                                struct cpuidle_device *dev, bool *stop_tick)
 {return -ENODEV; }
 static inline int cpuidle_enter(struct cpuidle_driver *drv,
                                struct cpuidle_device *dev, int index)
@@ -250,7 +251,8 @@ struct cpuidle_governor {
                                        struct cpuidle_device *dev);
 
        int  (*select)          (struct cpuidle_driver *drv,
-                                       struct cpuidle_device *dev);
+                                       struct cpuidle_device *dev,
+                                       bool *stop_tick);
        void (*reflect)         (struct cpuidle_device *dev, int index);
 };
 
index 0185ecdae1352d53f073d398b239c543c4d367d5..f9eb22ad341e4a6dc095e94cc2f2e22ce7649186 100644 (file)
@@ -26,18 +26,42 @@ extern struct attribute_group dax_attribute_group;
 
 #if IS_ENABLED(CONFIG_DAX)
 struct dax_device *dax_get_by_host(const char *host);
+struct dax_device *alloc_dax(void *private, const char *host,
+               const struct dax_operations *ops);
 void put_dax(struct dax_device *dax_dev);
+void kill_dax(struct dax_device *dax_dev);
+void dax_write_cache(struct dax_device *dax_dev, bool wc);
+bool dax_write_cache_enabled(struct dax_device *dax_dev);
 #else
 static inline struct dax_device *dax_get_by_host(const char *host)
 {
        return NULL;
 }
-
+static inline struct dax_device *alloc_dax(void *private, const char *host,
+               const struct dax_operations *ops)
+{
+       /*
+        * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
+        * NULL is an error or expected.
+        */
+       return NULL;
+}
 static inline void put_dax(struct dax_device *dax_dev)
 {
 }
+static inline void kill_dax(struct dax_device *dax_dev)
+{
+}
+static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
+{
+}
+static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
+{
+       return false;
+}
 #endif
 
+struct writeback_control;
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 int __bdev_dax_supported(struct super_block *sb, int blocksize);
@@ -57,6 +81,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
 }
 
 struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
+int dax_writeback_mapping_range(struct address_space *mapping,
+               struct block_device *bdev, struct writeback_control *wbc);
 #else
 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
@@ -76,22 +102,23 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 {
        return NULL;
 }
+
+static inline int dax_writeback_mapping_range(struct address_space *mapping,
+               struct block_device *bdev, struct writeback_control *wbc)
+{
+       return -EOPNOTSUPP;
+}
 #endif
 
 int dax_read_lock(void);
 void dax_read_unlock(int id);
-struct dax_device *alloc_dax(void *private, const char *host,
-               const struct dax_operations *ops);
 bool dax_alive(struct dax_device *dax_dev);
-void kill_dax(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
                void **kaddr, pfn_t *pfn);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
                size_t bytes, struct iov_iter *i);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
-void dax_write_cache(struct dax_device *dax_dev, bool wc);
-bool dax_write_cache_enabled(struct dax_device *dax_dev);
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
                const struct iomap_ops *ops);
@@ -121,7 +148,4 @@ static inline bool dax_mapping(struct address_space *mapping)
        return mapping->host && IS_DAX(mapping->host);
 }
 
-struct writeback_control;
-int dax_writeback_mapping_range(struct address_space *mapping,
-               struct block_device *bdev, struct writeback_control *wbc);
 #endif
index f838764993eb366d8813052835e58c6a60f57aca..861be5cab1dff22b5ae2892eab265585a0c2de29 100644 (file)
@@ -470,7 +470,11 @@ typedef void (*dma_async_tx_callback_result)(void *dma_async_param,
                                const struct dmaengine_result *result);
 
 struct dmaengine_unmap_data {
+#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
+       u16 map_cnt;
+#else
        u8 map_cnt;
+#endif
        u8 to_cnt;
        u8 from_cnt;
        u8 bidi_cnt;
index 3a5c19d9f651aab263f44703001cba8b9a77ca58..5d93995743b5be6d3809ef9d893646a65f496356 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/list_lru.h>
 #include <linux/llist.h>
 #include <linux/radix-tree.h>
+#include <linux/xarray.h>
 #include <linux/rbtree.h>
 #include <linux/init.h>
 #include <linux/pid.h>
@@ -390,12 +391,11 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
 
 struct address_space {
        struct inode            *host;          /* owner: inode, block_device */
-       struct radix_tree_root  page_tree;      /* radix tree of all pages */
-       spinlock_t              tree_lock;      /* and lock protecting it */
+       struct radix_tree_root  i_pages;        /* cached pages */
        atomic_t                i_mmap_writable;/* count VM_SHARED mappings */
        struct rb_root_cached   i_mmap;         /* tree of private and shared mappings */
        struct rw_semaphore     i_mmap_rwsem;   /* protect tree, count, list */
-       /* Protected by tree_lock together with the radix tree */
+       /* Protected by the i_pages lock */
        unsigned long           nrpages;        /* number of total pages */
        /* number of shadow or DAX exceptional entries */
        unsigned long           nrexceptional;
@@ -1989,7 +1989,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  *
  * I_WB_SWITCH         Cgroup bdi_writeback switching in progress.  Used to
  *                     synchronize competing switching instances and to tell
- *                     wb stat updates to grab mapping->tree_lock.  See
+ *                     wb stat updates to grab the i_pages lock.  See
  *                     inode_switch_wb_work_fn() for details.
  *
  * I_OVL_INUSE         Used by overlayfs to get exclusive ownership on upper
@@ -3127,6 +3127,10 @@ extern int simple_rmdir(struct inode *, struct dentry *);
 extern int simple_rename(struct inode *, struct dentry *,
                         struct inode *, struct dentry *, unsigned int);
 extern int noop_fsync(struct file *, loff_t, loff_t, int);
+extern int noop_set_page_dirty(struct page *page);
+extern void noop_invalidatepage(struct page *page, unsigned int offset,
+               unsigned int length);
+extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
index 325017ad9311749820e648949d987f42ef5e0105..39988924de3aa08f5c3c6d0c2ac0773d4d78a269 100644 (file)
 struct hmm;
 
 /*
- * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
+ * hmm_pfn_flag_e - HMM flag enums
  *
  * Flags:
- * HMM_PFN_VALID: pfn is valid
- * HMM_PFN_READ:  CPU page table has read permission set
+ * HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
  * HMM_PFN_WRITE: CPU page table has write permission set
+ * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE)
+ *
+ * The driver provide a flags array, if driver valid bit for an entry is bit
+ * 3 ie (entry & (1 << 3)) is true if entry is valid then driver must provide
+ * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
+ * Same logic apply to all flags. This is same idea as vm_page_prot in vma
+ * except that this is per device driver rather than per architecture.
+ */
+enum hmm_pfn_flag_e {
+       HMM_PFN_VALID = 0,
+       HMM_PFN_WRITE,
+       HMM_PFN_DEVICE_PRIVATE,
+       HMM_PFN_FLAG_MAX
+};
+
+/*
+ * hmm_pfn_value_e - HMM pfn special value
+ *
+ * Flags:
  * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
- * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none()
+ * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
  * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
  *      result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not
  *      be mirrored by a device, because the entry will never have HMM_PFN_VALID
  *      set and the pfn value is undefined.
- * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE)
+ *
+ * Driver provide entry value for none entry, error entry and special entry,
+ * driver can alias (ie use same value for error and special for instance). It
+ * should not alias none and error or special.
+ *
+ * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be:
+ * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous,
+ * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table
+ * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one
  */
-typedef unsigned long hmm_pfn_t;
+enum hmm_pfn_value_e {
+       HMM_PFN_ERROR,
+       HMM_PFN_NONE,
+       HMM_PFN_SPECIAL,
+       HMM_PFN_VALUE_MAX
+};
 
-#define HMM_PFN_VALID (1 << 0)
-#define HMM_PFN_READ (1 << 1)
-#define HMM_PFN_WRITE (1 << 2)
-#define HMM_PFN_ERROR (1 << 3)
-#define HMM_PFN_EMPTY (1 << 4)
-#define HMM_PFN_SPECIAL (1 << 5)
-#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6)
-#define HMM_PFN_SHIFT 7
+/*
+ * struct hmm_range - track invalidation lock on virtual address range
+ *
+ * @vma: the vm area struct for the range
+ * @list: all range lock are on a list
+ * @start: range virtual start address (inclusive)
+ * @end: range virtual end address (exclusive)
+ * @pfns: array of pfns (big enough for the range)
+ * @flags: pfn flags to match device driver page table
+ * @values: pfn value for some special case (none, special, error, ...)
+ * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
+ * @valid: pfns array did not change since it has been fill by an HMM function
+ */
+struct hmm_range {
+       struct vm_area_struct   *vma;
+       struct list_head        list;
+       unsigned long           start;
+       unsigned long           end;
+       uint64_t                *pfns;
+       const uint64_t          *flags;
+       const uint64_t          *values;
+       uint8_t                 pfn_shift;
+       bool                    valid;
+};
 
 /*
- * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t
- * @pfn: hmm_pfn_t to convert to struct page
- * Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise
+ * hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn
+ * @range: range use to decode HMM pfn value
+ * @pfn: HMM pfn value to get corresponding struct page from
+ * Returns: struct page pointer if pfn is a valid HMM pfn, NULL otherwise
  *
- * If the hmm_pfn_t is valid (ie valid flag set) then return the struct page
- * matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL.
+ * If the HMM pfn is valid (ie valid flag set) then return the struct page
+ * matching the pfn value stored in the HMM pfn. Otherwise return NULL.
  */
-static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn)
+static inline struct page *hmm_pfn_to_page(const struct hmm_range *range,
+                                          uint64_t pfn)
 {
-       if (!(pfn & HMM_PFN_VALID))
+       if (pfn == range->values[HMM_PFN_NONE])
+               return NULL;
+       if (pfn == range->values[HMM_PFN_ERROR])
                return NULL;
-       return pfn_to_page(pfn >> HMM_PFN_SHIFT);
+       if (pfn == range->values[HMM_PFN_SPECIAL])
+               return NULL;
+       if (!(pfn & range->flags[HMM_PFN_VALID]))
+               return NULL;
+       return pfn_to_page(pfn >> range->pfn_shift);
 }
 
 /*
- * hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t
- * @pfn: hmm_pfn_t to extract pfn from
- * Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise
+ * hmm_pfn_to_pfn() - return pfn value store in a HMM pfn
+ * @range: range use to decode HMM pfn value
+ * @pfn: HMM pfn value to extract pfn from
+ * Returns: pfn value if HMM pfn is valid, -1UL otherwise
  */
-static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn)
+static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range,
+                                          uint64_t pfn)
 {
-       if (!(pfn & HMM_PFN_VALID))
+       if (pfn == range->values[HMM_PFN_NONE])
+               return -1UL;
+       if (pfn == range->values[HMM_PFN_ERROR])
+               return -1UL;
+       if (pfn == range->values[HMM_PFN_SPECIAL])
+               return -1UL;
+       if (!(pfn & range->flags[HMM_PFN_VALID]))
                return -1UL;
-       return (pfn >> HMM_PFN_SHIFT);
+       return (pfn >> range->pfn_shift);
 }
 
 /*
- * hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page
- * @page: struct page pointer for which to create the hmm_pfn_t
- * Returns: valid hmm_pfn_t for the page
+ * hmm_pfn_from_page() - create a valid HMM pfn value from struct page
+ * @range: range use to encode HMM pfn value
+ * @page: struct page pointer for which to create the HMM pfn
+ * Returns: valid HMM pfn for the page
  */
-static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page)
+static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range,
+                                        struct page *page)
 {
-       return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+       return (page_to_pfn(page) << range->pfn_shift) |
+               range->flags[HMM_PFN_VALID];
 }
 
 /*
- * hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn
- * @pfn: pfn value for which to create the hmm_pfn_t
- * Returns: valid hmm_pfn_t for the pfn
+ * hmm_pfn_from_pfn() - create a valid HMM pfn value from pfn
+ * @range: range use to encode HMM pfn value
+ * @pfn: pfn value for which to create the HMM pfn
+ * Returns: valid HMM pfn for the pfn
  */
-static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn)
+static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
+                                       unsigned long pfn)
 {
-       return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+       return (pfn << range->pfn_shift) |
+               range->flags[HMM_PFN_VALID];
 }
 
 
@@ -218,6 +287,16 @@ enum hmm_update_type {
  * @update: callback to update range on a device
  */
 struct hmm_mirror_ops {
+       /* release() - release hmm_mirror
+        *
+        * @mirror: pointer to struct hmm_mirror
+        *
+        * This is called when the mm_struct is being released.
+        * The callback should make sure no references to the mirror occur
+        * after the callback returns.
+        */
+       void (*release)(struct hmm_mirror *mirror);
+
        /* sync_cpu_device_pagetables() - synchronize page tables
         *
         * @mirror: pointer to struct hmm_mirror
@@ -261,23 +340,6 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm);
 void hmm_mirror_unregister(struct hmm_mirror *mirror);
 
 
-/*
- * struct hmm_range - track invalidation lock on virtual address range
- *
- * @list: all range lock are on a list
- * @start: range virtual start address (inclusive)
- * @end: range virtual end address (exclusive)
- * @pfns: array of pfns (big enough for the range)
- * @valid: pfns array did not change since it has been fill by an HMM function
- */
-struct hmm_range {
-       struct list_head        list;
-       unsigned long           start;
-       unsigned long           end;
-       hmm_pfn_t               *pfns;
-       bool                    valid;
-};
-
 /*
  * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device
  * driver lock that serializes device page table updates, then call
@@ -291,17 +353,13 @@ struct hmm_range {
  *
  * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID !
  */
-int hmm_vma_get_pfns(struct vm_area_struct *vma,
-                    struct hmm_range *range,
-                    unsigned long start,
-                    unsigned long end,
-                    hmm_pfn_t *pfns);
-bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
+int hmm_vma_get_pfns(struct hmm_range *range);
+bool hmm_vma_range_done(struct hmm_range *range);
 
 
 /*
  * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will
- * not migrate any device memory back to system memory. The hmm_pfn_t array will
+ * not migrate any device memory back to system memory. The HMM pfn array will
  * be updated with the fault result and current snapshot of the CPU page table
  * for the range.
  *
@@ -310,22 +368,26 @@ bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
  * function returns -EAGAIN.
  *
  * Return value does not reflect if the fault was successful for every single
- * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to
+ * address or not. Therefore, the caller must to inspect the HMM pfn array to
  * determine fault status for each address.
  *
  * Trying to fault inside an invalid vma will result in -EINVAL.
  *
  * See the function description in mm/hmm.c for further documentation.
  */
-int hmm_vma_fault(struct vm_area_struct *vma,
-                 struct hmm_range *range,
-                 unsigned long start,
-                 unsigned long end,
-                 hmm_pfn_t *pfns,
-                 bool write,
-                 bool block);
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+int hmm_vma_fault(struct hmm_range *range, bool block);
 
+/* Below are for HMM internal use only! Not to be used by device driver! */
+void hmm_mm_destroy(struct mm_struct *mm);
+
+static inline void hmm_mm_init(struct mm_struct *mm)
+{
+       mm->hmm = NULL;
+}
+#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+static inline void hmm_mm_destroy(struct mm_struct *mm) {}
+static inline void hmm_mm_init(struct mm_struct *mm) {}
+#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 struct hmm_devmem;
@@ -498,23 +560,9 @@ struct hmm_device {
 struct hmm_device *hmm_device_new(void *drvdata);
 void hmm_device_put(struct hmm_device *hmm_device);
 #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-#endif /* IS_ENABLED(CONFIG_HMM) */
-
-/* Below are for HMM internal use only! Not to be used by device driver! */
-#if IS_ENABLED(CONFIG_HMM_MIRROR)
-void hmm_mm_destroy(struct mm_struct *mm);
-
-static inline void hmm_mm_init(struct mm_struct *mm)
-{
-       mm->hmm = NULL;
-}
-#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-static inline void hmm_mm_destroy(struct mm_struct *mm) {}
-static inline void hmm_mm_init(struct mm_struct *mm) {}
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-
-
 #else /* IS_ENABLED(CONFIG_HMM) */
 static inline void hmm_mm_destroy(struct mm_struct *mm) {}
 static inline void hmm_mm_init(struct mm_struct *mm) {}
+#endif /* IS_ENABLED(CONFIG_HMM) */
+
 #endif /* LINUX_HMM_H */
index 78f456fcd2425241c8a5a7ca5f8e97ffc86016d9..a2656c3ebe81cafaff88dc2329897b4a22d89edd 100644 (file)
@@ -424,6 +424,7 @@ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
 }
 
 extern u64 hrtimer_get_next_event(void);
+extern u64 hrtimer_next_event_without(const struct hrtimer *exclude);
 
 extern bool hrtimer_active(const struct hrtimer *timer);
 
index 7d6a6313f0aba5d0505ac6bfed5da19273745252..e856f4e0ab359d085d33266ca4505ea941df01af 100644 (file)
@@ -29,29 +29,31 @@ struct idr {
 #define IDR_FREE       0
 
 /* Set the IDR flag and the IDR_FREE tag */
-#define IDR_RT_MARKER          ((__force gfp_t)(3 << __GFP_BITS_SHIFT))
+#define IDR_RT_MARKER  (ROOT_IS_IDR | (__force gfp_t)                  \
+                                       (1 << (ROOT_TAG_SHIFT + IDR_FREE)))
 
-#define IDR_INIT_BASE(base) {                                          \
-       .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER),                       \
+#define IDR_INIT_BASE(name, base) {                                    \
+       .idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER),                 \
        .idr_base = (base),                                             \
        .idr_next = 0,                                                  \
 }
 
 /**
  * IDR_INIT() - Initialise an IDR.
+ * @name: Name of IDR.
  *
  * A freshly-initialised IDR contains no IDs.
  */
-#define IDR_INIT       IDR_INIT_BASE(0)
+#define IDR_INIT(name) IDR_INIT_BASE(name, 0)
 
 /**
- * DEFINE_IDR() - Define a statically-allocated IDR
- * @name: Name of IDR
+ * DEFINE_IDR() - Define a statically-allocated IDR.
+ * @name: Name of IDR.
  *
  * An IDR defined using this macro is ready for use with no additional
  * initialisation required.  It contains no IDs.
  */
-#define DEFINE_IDR(name)       struct idr name = IDR_INIT
+#define DEFINE_IDR(name)       struct idr name = IDR_INIT(name)
 
 /**
  * idr_get_cursor - Return the current position of the cyclic allocator
@@ -218,10 +220,10 @@ struct ida {
        struct radix_tree_root  ida_rt;
 };
 
-#define IDA_INIT       {                                               \
-       .ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),          \
+#define IDA_INIT(name) {                                               \
+       .ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT),    \
 }
-#define DEFINE_IDA(name)       struct ida name = IDA_INIT
+#define DEFINE_IDA(name)       struct ida name = IDA_INIT(name)
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
index 8dad3dd26eaeddc60aea0ce70547a2964364befd..ef169d67df9217a8bf9d1dad19be7920ae0352f2 100644 (file)
 #define DMA_FECTL_IM (((u32)1) << 31)
 
 /* FSTS_REG */
-#define DMA_FSTS_PPF ((u32)2)
-#define DMA_FSTS_PFO ((u32)1)
-#define DMA_FSTS_IQE (1 << 4)
-#define DMA_FSTS_ICE (1 << 5)
-#define DMA_FSTS_ITE (1 << 6)
-#define DMA_FSTS_PRO (1 << 7)
+#define DMA_FSTS_PFO (1 << 0) /* Primary Fault Overflow */
+#define DMA_FSTS_PPF (1 << 1) /* Primary Pending Fault */
+#define DMA_FSTS_IQE (1 << 4) /* Invalidation Queue Error */
+#define DMA_FSTS_ICE (1 << 5) /* Invalidation Completion Error */
+#define DMA_FSTS_ITE (1 << 6) /* Invalidation Time-out Error */
+#define DMA_FSTS_PRO (1 << 7) /* Page Request Overflow */
 #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
 
 /* FRCD_REG, 32 bits access */
index 41b8c575785916f7691a7686a5c5d4e7d99003fa..19938ee6eb31638d487a71c6823c4a7e6ceefef2 100644 (file)
@@ -465,23 +465,23 @@ static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
        return -ENODEV;
 }
 
-static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-                             size_t size)
+static inline size_t iommu_unmap(struct iommu_domain *domain,
+                                unsigned long iova, size_t size)
 {
-       return -ENODEV;
+       return 0;
 }
 
-static inline int iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova,
-                                  int gfp_order)
+static inline size_t iommu_unmap_fast(struct iommu_domain *domain,
+                                     unsigned long iova, int gfp_order)
 {
-       return -ENODEV;
+       return 0;
 }
 
 static inline size_t iommu_map_sg(struct iommu_domain *domain,
                                  unsigned long iova, struct scatterlist *sg,
                                  unsigned int nents, int prot)
 {
-       return -ENODEV;
+       return 0;
 }
 
 static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
index 9385aa57497b80851cbe7bfd6adc83c60f03034c..a27cf66523279c1a5d4aaa0d0087f1e9d48d170f 100644 (file)
@@ -62,8 +62,11 @@ extern int register_refined_jiffies(long clock_tick_rate);
 /* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
 #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)
 
-/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
-#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
+/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
+#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
+
+/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
+#define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
 
 #ifndef __jiffy_arch_data
 #define __jiffy_arch_data
index 52b70894eaa53fe9d827481e1f96104fa77c1654..6a1eb0b0aad96f2e8f890efb71ad5013a7241f20 100644 (file)
@@ -439,7 +439,8 @@ extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
 
-extern int num_to_str(char *buf, int size, unsigned long long num);
+extern int num_to_str(char *buf, int size,
+                     unsigned long long num, unsigned int width);
 
 /* lib/printf utilities */
 
@@ -543,6 +544,7 @@ extern enum system_states {
        SYSTEM_RESTART,
 } system_state;
 
+/* This cannot be an enum because some may be used in assembly source. */
 #define TAINT_PROPRIETARY_MODULE       0
 #define TAINT_FORCED_MODULE            1
 #define TAINT_CPU_OUT_OF_SPEC          2
@@ -560,7 +562,8 @@ extern enum system_states {
 #define TAINT_SOFTLOCKUP               14
 #define TAINT_LIVEPATCH                        15
 #define TAINT_AUX                      16
-#define TAINT_FLAGS_COUNT              17
+#define TAINT_RANDSTRUCT               17
+#define TAINT_FLAGS_COUNT              18
 
 struct taint_flag {
        char c_true;    /* character printed when tainted */
index e251533a59396c22ccdde16bd8a068dc4ede7b66..89fc8dc7bf38d0a48cf0c921112da303053787ce 100644 (file)
  */
 
 /*
- * Note about locking : There is no locking required until only * one reader
- * and one writer is using the fifo and no kfifo_reset() will be * called
- *  kfifo_reset_out() can be safely used, until it will be only called
+ * Note about locking: There is no locking required until only one reader
+ * and one writer is using the fifo and no kfifo_reset() will be called.
+ * kfifo_reset_out() can be safely used, until it will be only called
  * in the reader thread.
- *  For multiple writer and one reader there is only a need to lock the writer.
+ * For multiple writer and one reader there is only a need to lock the writer.
  * And vice versa for only one writer and multiple reader there is only a need
  * to lock the reader.
  */
index ff855ed965fb9d0100c0964f82dd9c4a91a9e32a..097072c5a852f22e7aab76750b4bb4de709a7bc3 100644 (file)
@@ -76,12 +76,14 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
                struct nvdimm *nvdimm, unsigned int cmd, void *buf,
                unsigned int buf_len, int *cmd_rc);
 
+struct device_node;
 struct nvdimm_bus_descriptor {
        const struct attribute_group **attr_groups;
        unsigned long bus_dsm_mask;
        unsigned long cmd_mask;
        struct module *module;
        char *provider_name;
+       struct device_node *of_node;
        ndctl_fn ndctl;
        int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
        int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc,
@@ -123,6 +125,7 @@ struct nd_region_desc {
        int num_lanes;
        int numa_node;
        unsigned long flags;
+       struct device_node *of_node;
 };
 
 struct device;
@@ -164,6 +167,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
 struct nd_region *to_nd_region(struct device *dev);
+struct device *nd_region_dev(struct nd_region *nd_region);
 struct nd_blk_region *to_nd_blk_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
index c46016bb25ebe2ba3919987876ae058dd0a7b01c..d99b71bc2c667a512541d33894ec073ff1bb6c91 100644 (file)
@@ -48,13 +48,12 @@ enum memcg_stat_item {
        MEMCG_NR_STAT,
 };
 
-/* Cgroup-specific events, on top of universal VM events */
-enum memcg_event_item {
-       MEMCG_LOW = NR_VM_EVENT_ITEMS,
+enum memcg_memory_event {
+       MEMCG_LOW,
        MEMCG_HIGH,
        MEMCG_MAX,
        MEMCG_OOM,
-       MEMCG_NR_EVENTS,
+       MEMCG_NR_MEMORY_EVENTS,
 };
 
 struct mem_cgroup_reclaim_cookie {
@@ -88,7 +87,7 @@ enum mem_cgroup_events_target {
 
 struct mem_cgroup_stat_cpu {
        long count[MEMCG_NR_STAT];
-       unsigned long events[MEMCG_NR_EVENTS];
+       unsigned long events[NR_VM_EVENT_ITEMS];
        unsigned long nr_page_events;
        unsigned long targets[MEM_CGROUP_NTARGETS];
 };
@@ -120,6 +119,9 @@ struct mem_cgroup_per_node {
        unsigned long           usage_in_excess;/* Set to the value by which */
                                                /* the soft limit is exceeded*/
        bool                    on_tree;
+       bool                    congested;      /* memcg has many dirty pages */
+                                               /* backed by a congested BDI */
+
        struct mem_cgroup       *memcg;         /* Back pointer, we cannot */
                                                /* use container_of        */
 };
@@ -202,7 +204,8 @@ struct mem_cgroup {
        /* OOM-Killer disable */
        int             oom_kill_disable;
 
-       /* handle for "memory.events" */
+       /* memory.events */
+       atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
        struct cgroup_file events_file;
 
        /* protect arrays of thresholds */
@@ -231,9 +234,10 @@ struct mem_cgroup {
        struct task_struct      *move_lock_task;
        unsigned long           move_lock_flags;
 
+       /* memory.stat */
        struct mem_cgroup_stat_cpu __percpu *stat_cpu;
        atomic_long_t           stat[MEMCG_NR_STAT];
-       atomic_long_t           events[MEMCG_NR_EVENTS];
+       atomic_long_t           events[NR_VM_EVENT_ITEMS];
 
        unsigned long           socket_pressure;
 
@@ -645,9 +649,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
                                                gfp_t gfp_mask,
                                                unsigned long *total_scanned);
 
-/* idx can be of type enum memcg_event_item or vm_event_item */
 static inline void __count_memcg_events(struct mem_cgroup *memcg,
-                                       int idx, unsigned long count)
+                                       enum vm_event_item idx,
+                                       unsigned long count)
 {
        unsigned long x;
 
@@ -663,7 +667,8 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg,
 }
 
 static inline void count_memcg_events(struct mem_cgroup *memcg,
-                                     int idx, unsigned long count)
+                                     enum vm_event_item idx,
+                                     unsigned long count)
 {
        unsigned long flags;
 
@@ -672,9 +677,8 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
        local_irq_restore(flags);
 }
 
-/* idx can be of type enum memcg_event_item or vm_event_item */
 static inline void count_memcg_page_event(struct page *page,
-                                         int idx)
+                                         enum vm_event_item idx)
 {
        if (page->mem_cgroup)
                count_memcg_events(page->mem_cgroup, idx, 1);
@@ -698,10 +702,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
        rcu_read_unlock();
 }
 
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
-                                   enum memcg_event_item event)
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+                                     enum memcg_memory_event event)
 {
-       count_memcg_events(memcg, event, 1);
+       atomic_long_inc(&memcg->memory_events[event]);
        cgroup_file_notify(&memcg->events_file);
 }
 
@@ -721,8 +725,8 @@ static inline bool mem_cgroup_disabled(void)
        return true;
 }
 
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
-                                   enum memcg_event_item event)
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+                                     enum memcg_memory_event event)
 {
 }
 
index 2b0265265c286b9fffb53e290039bf0686046908..e0e49b5b1ee138344185e6e1ae5796fc4ecf4fdf 100644 (file)
@@ -216,9 +216,6 @@ void put_online_mems(void);
 void mem_hotplug_begin(void);
 void mem_hotplug_done(void);
 
-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
 #else /* ! CONFIG_MEMORY_HOTPLUG */
 #define pfn_to_online_page(pfn)                        \
 ({                                             \
index 48c3c5be7eb1ffc90b5860b189b1d964b840dcd0..9ed2871ea335751a2aacf8be178ae3b76d59971a 100644 (file)
@@ -141,15 +141,4 @@ enum s2mps_rtc_reg {
 #define WTSR_ENABLE_SHIFT      6
 #define WTSR_ENABLE_MASK       (1 << WTSR_ENABLE_SHIFT)
 
-enum {
-       RTC_SEC = 0,
-       RTC_MIN,
-       RTC_HOUR,
-       RTC_WEEKDAY,
-       RTC_DATE,
-       RTC_MONTH,
-       RTC_YEAR1,
-       RTC_YEAR2,
-};
-
 #endif /*  __LINUX_MFD_SEC_RTC_H */
index ab45f8a0d288ab03665e2d88a075c0479c76318d..f2b4abbca55e75f82c55dd6b8f7771d54738a23b 100644 (file)
@@ -7,8 +7,7 @@
 #include <linux/migrate_mode.h>
 #include <linux/hugetlb.h>
 
-typedef struct page *new_page_t(struct page *page, unsigned long private,
-                               int **reason);
+typedef struct page *new_page_t(struct page *page, unsigned long private);
 typedef void free_page_t(struct page *page, unsigned long private);
 
 /*
@@ -43,9 +42,9 @@ static inline struct page *new_page_nodemask(struct page *page,
                return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
                                preferred_nid, nodemask);
 
-       if (thp_migration_supported() && PageTransHuge(page)) {
-               order = HPAGE_PMD_ORDER;
+       if (PageTransHuge(page)) {
                gfp_mask |= GFP_TRANSHUGE;
+               order = HPAGE_PMD_ORDER;
        }
 
        if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
index 3ad6323669732dc94bc9a67a603f97989d5bdfd7..1ac1f06a4be6b22faf3883c760515a042a6d347e 100644 (file)
@@ -747,7 +747,7 @@ int finish_mkwrite_fault(struct vm_fault *vmf);
  * refcount. The each user mapping also has a reference to the page.
  *
  * The pagecache pages are stored in a per-mapping radix tree, which is
- * rooted at mapping->page_tree, and indexed by offset.
+ * rooted at mapping->i_pages, and indexed by offset.
  * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
  * lists, we instead now tag pages as dirty/writeback in the radix tree.
  *
@@ -1466,6 +1466,7 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned int offset,
                              unsigned int length);
 
+void __set_page_dirty(struct page *, struct address_space *, int warn);
 int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
@@ -2108,6 +2109,7 @@ extern void setup_per_cpu_pageset(void);
 
 extern void zone_pcp_update(struct zone *zone);
 extern void zone_pcp_reset(struct zone *zone);
+extern void setup_zone_pageset(struct zone *zone);
 
 /* page_alloc.c */
 extern int min_free_kbytes;
index f11ae29005f1b44cfda0958235043a4e51dbd830..32699b2dc52a1f8bece62d3ae7d35d67cbe1e88f 100644 (file)
@@ -180,6 +180,7 @@ enum node_stat_item {
        NR_VMSCAN_IMMEDIATE,    /* Prioritise for reclaim when writeback ends */
        NR_DIRTIED,             /* page dirtyings since bootup */
        NR_WRITTEN,             /* page writings since bootup */
+       NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */
        NR_VM_NODE_STAT_ITEMS
 };
 
@@ -884,7 +885,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
 int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
+extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
 int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
index 5dc6b695437da044352fc6606f0a4ccaee29d6db..43c181a6add582df82fad2a0161e93b05883392e 100644 (file)
@@ -180,6 +180,12 @@ struct nd_region;
 void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event);
 int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
                struct module *module, const char *mod_name);
+static inline void nd_driver_unregister(struct nd_device_driver *drv)
+{
+       driver_unregister(&drv->drv);
+}
 #define nd_driver_register(driver) \
        __nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+#define module_nd_driver(driver) \
+       module_driver(driver, nd_driver_register, nd_driver_unregister)
 #endif /* __LINUX_ND_H__ */
index cdad58bbfd8bffc2ccb0ecc42e29b10459a4e9d7..4ae347cbc36d16fcaed260ee9f39ad63259c7ed7 100644 (file)
@@ -63,7 +63,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
                        bool skip_hwpoisoned_pages);
 
-struct page *alloc_migrate_target(struct page *page, unsigned long private,
-                               int **resultp);
+struct page *alloc_migrate_target(struct page *page, unsigned long private);
 
 #endif
index 34ce3ebf97d5eaf3914109144b15d71051226e25..b1bd2186e6d2bdc9428580184f4baa0ae51ec287 100644 (file)
@@ -144,7 +144,7 @@ void release_pages(struct page **pages, int nr);
  * 3. check the page is still in pagecache (if no, goto 1)
  *
  * Remove-side that cares about stability of _refcount (eg. reclaim) has the
- * following (with tree_lock held for write):
+ * following (with the i_pages lock held):
  * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
  * B. remove page from pagecache
  * C. free the page
@@ -157,7 +157,7 @@ void release_pages(struct page **pages, int nr);
  *
  * It is possible that between 1 and 2, the page is removed then the exact same
  * page is inserted into the same position in pagecache. That's OK: the
- * old find_get_page using tree_lock could equally have run before or after
+ * old find_get_page using lock could equally have run before or after
  * such a re-insertion, depending on order that locks are granted.
  *
  * Lookups racing against pagecache insertion isn't a big problem: either 1
index fcdc707eab9975f0a962038b8cdb1525f6614bf1..2744cff1b297e198041e6982700d2dc9fb9d8c58 100644 (file)
@@ -129,6 +129,8 @@ struct mlxreg_core_platform_data {
  * @mask: top aggregation interrupt common mask;
  * @cell_low: location of low aggregation interrupt register;
  * @mask_low: low aggregation interrupt common mask;
+ * @deferred_nr: I2C adapter number must be exist prior probing execution;
+ * @shift_nr: I2C adapter numbers must be incremented by this value;
  */
 struct mlxreg_core_hotplug_platform_data {
        struct mlxreg_core_item *items;
@@ -139,6 +141,8 @@ struct mlxreg_core_hotplug_platform_data {
        u32 mask;
        u32 cell_low;
        u32 mask_low;
+       int deferred_nr;
+       int shift_nr;
 };
 
 #endif /* __LINUX_PLATFORM_DATA_MLXREG_H */
index fc55ff31eca7040208ca161dd0196451c78fe33a..34149e8b5f73fe6cd821ba58c5196974d0adee2e 100644 (file)
@@ -104,25 +104,29 @@ struct radix_tree_node {
        unsigned long   tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
-#define ROOT_IS_IDR    ((__force gfp_t)(1 << __GFP_BITS_SHIFT))
-#define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT + 1)
+/* The IDR tag is stored in the low bits of the GFP flags */
+#define ROOT_IS_IDR    ((__force gfp_t)4)
+/* The top bits of gfp_mask are used to store the root tags */
+#define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT)
 
 struct radix_tree_root {
+       spinlock_t              xa_lock;
        gfp_t                   gfp_mask;
        struct radix_tree_node  __rcu *rnode;
 };
 
-#define RADIX_TREE_INIT(mask)  {                                       \
+#define RADIX_TREE_INIT(name, mask)    {                               \
+       .xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),                  \
        .gfp_mask = (mask),                                             \
        .rnode = NULL,                                                  \
 }
 
 #define RADIX_TREE(name, mask) \
-       struct radix_tree_root name = RADIX_TREE_INIT(mask)
+       struct radix_tree_root name = RADIX_TREE_INIT(name, mask)
 
 #define INIT_RADIX_TREE(root, mask)                                    \
 do {                                                                   \
+       spin_lock_init(&(root)->xa_lock);                               \
        (root)->gfp_mask = (mask);                                      \
        (root)->rnode = NULL;                                           \
 } while (0)
index 728d421fffe983ff877b5ba279d21d7b3857352a..d09a9c7af109fbb27324d9f4f7f83c60843c9110 100644 (file)
@@ -344,7 +344,7 @@ struct rproc_ops {
        int (*stop)(struct rproc *rproc);
        void (*kick)(struct rproc *rproc, int vqid);
        void * (*da_to_va)(struct rproc *rproc, u64 da, int len);
-       int (*load_rsc_table)(struct rproc *rproc, const struct firmware *fw);
+       int (*parse_fw)(struct rproc *rproc, const struct firmware *fw);
        struct resource_table *(*find_loaded_rsc_table)(
                                struct rproc *rproc, const struct firmware *fw);
        int (*load)(struct rproc *rproc, const struct firmware *fw);
@@ -394,6 +394,21 @@ enum rproc_crash_type {
        RPROC_FATAL_ERROR,
 };
 
+/**
+ * struct rproc_dump_segment - segment info from ELF header
+ * @node:      list node related to the rproc segment list
+ * @da:                device address of the segment
+ * @size:      size of the segment
+ */
+struct rproc_dump_segment {
+       struct list_head node;
+
+       dma_addr_t da;
+       size_t size;
+
+       loff_t offset;
+};
+
 /**
  * struct rproc - represents a physical remote processor device
  * @node: list node of this rproc object
@@ -424,6 +439,7 @@ enum rproc_crash_type {
  * @cached_table: copy of the resource table
  * @table_sz: size of @cached_table
  * @has_iommu: flag to indicate if remote processor is behind an MMU
+ * @dump_segments: list of segments in the firmware
  */
 struct rproc {
        struct list_head node;
@@ -455,19 +471,21 @@ struct rproc {
        size_t table_sz;
        bool has_iommu;
        bool auto_boot;
+       struct list_head dump_segments;
 };
 
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
  * @probe: probe function, called as the rproc is started
- * @remove: remove function, called as the rproc is stopped
+ * @remove: remove function, called as the rproc is being stopped, the @crashed
+ *         parameter indicates if this originates from the a recovery
  */
 struct rproc_subdev {
        struct list_head node;
 
        int (*probe)(struct rproc_subdev *subdev);
-       void (*remove)(struct rproc_subdev *subdev);
+       void (*remove)(struct rproc_subdev *subdev, bool crashed);
 };
 
 /* we currently support only two vrings per rvdev */
@@ -534,6 +552,7 @@ void rproc_free(struct rproc *rproc);
 int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
+int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size);
 
 static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev)
 {
@@ -550,7 +569,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 void rproc_add_subdev(struct rproc *rproc,
                      struct rproc_subdev *subdev,
                      int (*probe)(struct rproc_subdev *subdev),
-                     void (*remove)(struct rproc_subdev *subdev));
+                     void (*remove)(struct rproc_subdev *subdev, bool graceful));
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
index 7d9eb39fa76a89b89eb34a17d6b61b90294bfaa9..a0233edc0718e66728d54060c78a0bf52b366e3e 100644 (file)
@@ -34,10 +34,12 @@ struct ring_buffer_event {
  *                              array[0] = time delta (28 .. 59)
  *                              size = 8 bytes
  *
- * @RINGBUF_TYPE_TIME_STAMP:   Sync time stamp with external clock
- *                              array[0]    = tv_nsec
- *                              array[1..2] = tv_sec
- *                              size = 16 bytes
+ * @RINGBUF_TYPE_TIME_STAMP:   Absolute timestamp
+ *                              Same format as TIME_EXTEND except that the
+ *                              value is an absolute timestamp, not a delta
+ *                              event.time_delta contains bottom 27 bits
+ *                              array[0] = top (28 .. 59) bits
+ *                              size = 8 bytes
  *
  * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
  *                             Data record
@@ -54,12 +56,12 @@ enum ring_buffer_type {
        RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
        RINGBUF_TYPE_PADDING,
        RINGBUF_TYPE_TIME_EXTEND,
-       /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
        RINGBUF_TYPE_TIME_STAMP,
 };
 
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
 void *ring_buffer_event_data(struct ring_buffer_event *event);
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event);
 
 /*
  * ring_buffer_discard_commit will remove an event that has not
@@ -115,6 +117,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 int ring_buffer_write(struct ring_buffer *buffer,
                      unsigned long length, void *data);
 
+void ring_buffer_nest_start(struct ring_buffer *buffer);
+void ring_buffer_nest_end(struct ring_buffer *buffer);
+
 struct ring_buffer_event *
 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
                 unsigned long *lost_events);
@@ -178,6 +183,8 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
                                      int cpu, u64 *ts);
 void ring_buffer_set_clock(struct ring_buffer *buffer,
                           u64 (*clock)(void));
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs);
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer);
 
 size_t ring_buffer_page_len(void *page);
 
index fc6c90b57be0afd9f5441425378747635c430a61..4c007f69082f477e8e6a03669e84d61d5f694e35 100644 (file)
@@ -145,12 +145,17 @@ struct rtc_device {
 
        bool registered;
 
-       struct nvmem_config *nvmem_config;
        struct nvmem_device *nvmem;
        /* Old ABI support */
        bool nvram_old_abi;
        struct bin_attribute *nvram;
 
+       time64_t range_min;
+       timeu64_t range_max;
+       time64_t start_secs;
+       time64_t offset_secs;
+       bool set_start_time;
+
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
        struct work_struct uie_task;
        struct timer_list uie_timer;
@@ -164,6 +169,11 @@ struct rtc_device {
 };
 #define to_rtc_device(d) container_of(d, struct rtc_device, dev)
 
+/* useful timestamps */
+#define RTC_TIMESTAMP_BEGIN_1900       -2208989361LL /* 1900-01-01 00:00:00 */
+#define RTC_TIMESTAMP_BEGIN_2000       946684800LL /* 2000-01-01 00:00:00 */
+#define RTC_TIMESTAMP_END_2099         4102444799LL /* 2099-12-31 23:59:59 */
+
 extern struct rtc_device *rtc_device_register(const char *name,
                                        struct device *dev,
                                        const struct rtc_class_ops *ops,
@@ -212,10 +222,6 @@ void rtc_aie_update_irq(void *private);
 void rtc_uie_update_irq(void *private);
 enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer);
 
-int rtc_register(rtc_task_t *task);
-int rtc_unregister(rtc_task_t *task);
-int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
-
 void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data);
 int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer,
                    ktime_t expires, ktime_t period);
@@ -271,4 +277,17 @@ extern int rtc_hctosys_ret;
 #define rtc_hctosys_ret -ENODEV
 #endif
 
+#ifdef CONFIG_RTC_NVMEM
+int rtc_nvmem_register(struct rtc_device *rtc,
+                      struct nvmem_config *nvmem_config);
+void rtc_nvmem_unregister(struct rtc_device *rtc);
+#else
+static inline int rtc_nvmem_register(struct rtc_device *rtc,
+                                    struct nvmem_config *nvmem_config)
+{
+       return -ENODEV;
+}
+static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
+#endif
+
 #endif /* _LINUX_RTC_H_ */
index 9806184bb3d54eb5160db40f747574868837e787..2c570cd934af54c14dcddbf971271c087571887d 100644 (file)
@@ -104,7 +104,8 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 #endif /* CONFIG_MEMCG */
 
 #ifdef CONFIG_MMU
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
+extern void arch_pick_mmap_layout(struct mm_struct *mm,
+                                 struct rlimit *rlim_stack);
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
                       unsigned long, unsigned long);
@@ -113,7 +114,8 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
                          unsigned long len, unsigned long pgoff,
                          unsigned long flags);
 #else
-static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+static inline void arch_pick_mmap_layout(struct mm_struct *mm,
+                                        struct rlimit *rlim_stack) {}
 #endif
 
 static inline bool in_vfork(struct task_struct *tsk)
index ab437dd2e3b9a5dc222824cb82eeda86124f614e..a121982af0f5b1cd0fd1439e2abf13333702d426 100644 (file)
@@ -118,9 +118,14 @@ __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
 void seq_puts(struct seq_file *m, const char *s);
+void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
+                              unsigned long long num, unsigned int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
                         unsigned long long num);
 void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num);
+void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
+                   unsigned long long v, unsigned int width);
+
 void seq_escape(struct seq_file *m, const char *s, const char *esc);
 
 void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
@@ -235,4 +240,5 @@ extern struct hlist_node *seq_hlist_start_percpu(struct hlist_head __percpu *hea
 
 extern struct hlist_node *seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, int *cpu, loff_t *pos);
 
+void seq_file_init(void);
 #endif
index bd8e0864b05923a8b4232ca82ffe62b9a9ba4208..5b98bbdabc258f067078b6387847f83a396fcb17 100644 (file)
@@ -14,6 +14,7 @@ struct firmware;
 ssize_t qcom_mdt_get_size(const struct firmware *fw);
 int qcom_mdt_load(struct device *dev, const struct firmware *fw,
                  const char *fw_name, int pas_id, void *mem_region,
-                 phys_addr_t mem_phys, size_t mem_size);
+                 phys_addr_t mem_phys, size_t mem_size,
+                 phys_addr_t *reloc_base);
 
 #endif
index 7f8c9a127f5a0c964a63581dfc954abf6d56ff4f..55388ab45fd4d474984beb5e4049471f8d29f646 100644 (file)
@@ -115,27 +115,46 @@ enum tick_dep_bits {
 extern bool tick_nohz_enabled;
 extern bool tick_nohz_tick_stopped(void);
 extern bool tick_nohz_tick_stopped_cpu(int cpu);
+extern void tick_nohz_idle_stop_tick(void);
+extern void tick_nohz_idle_retain_tick(void);
+extern void tick_nohz_idle_restart_tick(void);
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
-extern ktime_t tick_nohz_get_sleep_length(void);
+extern bool tick_nohz_idle_got_tick(void);
+extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
 extern unsigned long tick_nohz_get_idle_calls(void);
 extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
+
+static inline void tick_nohz_idle_stop_tick_protected(void)
+{
+       local_irq_disable();
+       tick_nohz_idle_stop_tick();
+       local_irq_enable();
+}
+
 #else /* !CONFIG_NO_HZ_COMMON */
 #define tick_nohz_enabled (0)
 static inline int tick_nohz_tick_stopped(void) { return 0; }
 static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; }
+static inline void tick_nohz_idle_stop_tick(void) { }
+static inline void tick_nohz_idle_retain_tick(void) { }
+static inline void tick_nohz_idle_restart_tick(void) { }
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
+static inline bool tick_nohz_idle_got_tick(void) { return false; }
 
-static inline ktime_t tick_nohz_get_sleep_length(void)
+static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
 {
-       return NSEC_PER_SEC / HZ;
+       *delta_next = TICK_NSEC;
+       return *delta_next;
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
+
+static inline void tick_nohz_idle_stop_tick_protected(void) { }
 #endif /* !CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
index 82c219dfd3bbd6c79ad65163a1304faa9dc5d2a1..9737fbec7019bd02b9f6bf0ef9d34e1dc67fdaaa 100644 (file)
@@ -31,6 +31,7 @@ struct timespec64 get_monotonic_coarse64(void);
 extern void getrawmonotonic64(struct timespec64 *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
 extern time64_t ktime_get_seconds(void);
+extern time64_t __ktime_get_real_seconds(void);
 extern time64_t ktime_get_real_seconds(void);
 extern void ktime_get_active_ts64(struct timespec64 *ts);
 
index e0e98000b66542b2067da3bc6c37c8190111f804..2bde3eff564cdde138c8d22da4d4d8d125e2ed3d 100644 (file)
@@ -430,11 +430,13 @@ enum event_trigger_type {
 
 extern int filter_match_preds(struct event_filter *filter, void *rec);
 
-extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
-                                                  void *rec);
-extern void event_triggers_post_call(struct trace_event_file *file,
-                                    enum event_trigger_type tt,
-                                    void *rec);
+extern enum event_trigger_type
+event_triggers_call(struct trace_event_file *file, void *rec,
+                   struct ring_buffer_event *event);
+extern void
+event_triggers_post_call(struct trace_event_file *file,
+                        enum event_trigger_type tt,
+                        void *rec, struct ring_buffer_event *event);
 
 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
 
@@ -454,7 +456,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
 
        if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
                if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
-                       event_triggers_call(file, NULL);
+                       event_triggers_call(file, NULL, NULL);
                if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
                        return true;
                if (eflags & EVENT_FILE_FL_PID_FILTER)
index c8060c2ecd04b574bb2b6a795bb758a34f778f91..44429d9142ca3d08309435d7fe27e4ade7038103 100644 (file)
@@ -44,6 +44,8 @@ static inline void put_uts_ns(struct uts_namespace *ns)
 {
        kref_put(&ns->kref, free_uts_ns);
 }
+
+void uts_ns_init(void);
 #else
 static inline void get_uts_ns(struct uts_namespace *ns)
 {
@@ -61,6 +63,10 @@ static inline struct uts_namespace *copy_utsname(unsigned long flags,
 
        return old_ns;
 }
+
+static inline void uts_ns_init(void)
+{
+}
 #endif
 
 #ifdef CONFIG_PROC_SYSCTL
index a4c2317d8b9f770ac67d0b0bc03cf36f464573fa..f25cef84b41db718a0d977c471b9afbc6773378e 100644 (file)
@@ -20,6 +20,17 @@ extern int sysctl_vm_numa_stat_handler(struct ctl_table *table,
                int write, void __user *buffer, size_t *length, loff_t *ppos);
 #endif
 
+struct reclaim_stat {
+       unsigned nr_dirty;
+       unsigned nr_unqueued_dirty;
+       unsigned nr_congested;
+       unsigned nr_writeback;
+       unsigned nr_immediate;
+       unsigned nr_activate;
+       unsigned nr_ref_keep;
+       unsigned nr_unmap_fail;
+};
+
 #ifdef CONFIG_VM_EVENT_COUNTERS
 /*
  * Light weight per cpu counter implementation.
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
new file mode 100644 (file)
index 0000000..2dfc800
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _LINUX_XARRAY_H
+#define _LINUX_XARRAY_H
+/*
+ * eXtensible Arrays
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <mawilcox@microsoft.com>
+ */
+
+#include <linux/spinlock.h>
+
+#define xa_trylock(xa)         spin_trylock(&(xa)->xa_lock)
+#define xa_lock(xa)            spin_lock(&(xa)->xa_lock)
+#define xa_unlock(xa)          spin_unlock(&(xa)->xa_lock)
+#define xa_lock_bh(xa)         spin_lock_bh(&(xa)->xa_lock)
+#define xa_unlock_bh(xa)       spin_unlock_bh(&(xa)->xa_lock)
+#define xa_lock_irq(xa)                spin_lock_irq(&(xa)->xa_lock)
+#define xa_unlock_irq(xa)      spin_unlock_irq(&(xa)->xa_lock)
+#define xa_lock_irqsave(xa, flags) \
+                               spin_lock_irqsave(&(xa)->xa_lock, flags)
+#define xa_unlock_irqrestore(xa, flags) \
+                               spin_unlock_irqrestore(&(xa)->xa_lock, flags)
+
+#endif /* _LINUX_XARRAY_H */
index 54b689247937576e7ad049e5967cec97965b202f..160bca96d5241b8c85b7041ff711c499ae1088c7 100644 (file)
@@ -320,6 +320,7 @@ void v4l_bound_align_image(unsigned int *width, unsigned int wmin,
  *     set of resolutions contained in an array of a driver specific struct.
  *
  * @array: a driver specific array of image sizes
+ * @array_size: the length of the driver specific array of image sizes
  * @width_field: the name of the width field in the driver specific struct
  * @height_field: the name of the height field in the driver specific struct
  * @width: desired width.
@@ -332,13 +333,13 @@ void v4l_bound_align_image(unsigned int *width, unsigned int wmin,
  *
  * Returns the best match or NULL if the length of the array is zero.
  */
-#define v4l2_find_nearest_size(array, width_field, height_field, \
+#define v4l2_find_nearest_size(array, array_size, width_field, height_field, \
                               width, height)                           \
        ({                                                              \
                BUILD_BUG_ON(sizeof((array)->width_field) != sizeof(u32) || \
                             sizeof((array)->height_field) != sizeof(u32)); \
                (typeof(&(*(array))))__v4l2_find_nearest_size(          \
-                       (array), ARRAY_SIZE(array), sizeof(*(array)),   \
+                       (array), array_size, sizeof(*(array)),          \
                        offsetof(typeof(*(array)), width_field),        \
                        offsetof(typeof(*(array)), height_field),       \
                        width, height);                                 \
index 27634e8d2585a01b042835d8fe05b80a2557a670..f60cf9cf3b9cff9381bef6e125a3989e33b906f4 100644 (file)
  */
 enum vfl_devnode_type {
        VFL_TYPE_GRABBER        = 0,
-       VFL_TYPE_VBI            = 1,
-       VFL_TYPE_RADIO          = 2,
-       VFL_TYPE_SUBDEV         = 3,
-       VFL_TYPE_SDR            = 4,
-       VFL_TYPE_TOUCH          = 5,
+       VFL_TYPE_VBI,
+       VFL_TYPE_RADIO,
+       VFL_TYPE_SUBDEV,
+       VFL_TYPE_SDR,
+       VFL_TYPE_TOUCH,
+       VFL_TYPE_MAX /* Shall be the last one */
 };
-#define VFL_TYPE_MAX VFL_TYPE_TOUCH
 
 /**
  * enum  vfl_direction - Identifies if a &struct video_device corresponds
index 95ccc1eef558459b088b4146f0b6b822fb1aec0c..b619a190ff1283a9bd43379c6dc54d4d8617dc4c 100644 (file)
@@ -895,7 +895,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
                                     u16 conn_timeout);
 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
                                u8 dst_type, u8 sec_level, u16 conn_timeout,
-                               u8 role);
+                               u8 role, bdaddr_t *direct_rpa);
 struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
                                 u8 sec_level, u8 auth_type);
 struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
index e21d8cadd48096f56956903e108467b9bd25edab..2e4f71e16e95676c26948ae7a5a564c9a0652bf8 100644 (file)
@@ -231,14 +231,6 @@ struct devlink_dpipe_headers {
        unsigned int headers_count;
 };
 
-/**
- * struct devlink_resource_ops - resource ops
- * @occ_get: get the occupied size
- */
-struct devlink_resource_ops {
-       u64 (*occ_get)(struct devlink *devlink);
-};
-
 /**
  * struct devlink_resource_size_params - resource's size parameters
  * @size_min: minimum size which can be set
@@ -265,6 +257,8 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para
        size_params->unit = unit;
 }
 
+typedef u64 devlink_resource_occ_get_t(void *priv);
+
 /**
  * struct devlink_resource - devlink resource
  * @name: name of the resource
@@ -277,7 +271,6 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para
  * @size_params: size parameters
  * @list: parent list
  * @resource_list: list of child resources
- * @resource_ops: resource ops
  */
 struct devlink_resource {
        const char *name;
@@ -289,7 +282,8 @@ struct devlink_resource {
        struct devlink_resource_size_params size_params;
        struct list_head list;
        struct list_head resource_list;
-       const struct devlink_resource_ops *resource_ops;
+       devlink_resource_occ_get_t *occ_get;
+       void *occ_get_priv;
 };
 
 #define DEVLINK_RESOURCE_ID_PARENT_TOP 0
@@ -409,8 +403,7 @@ int devlink_resource_register(struct devlink *devlink,
                              u64 resource_size,
                              u64 resource_id,
                              u64 parent_resource_id,
-                             const struct devlink_resource_size_params *size_params,
-                             const struct devlink_resource_ops *resource_ops);
+                             const struct devlink_resource_size_params *size_params);
 void devlink_resources_unregister(struct devlink *devlink,
                                  struct devlink_resource *resource);
 int devlink_resource_size_get(struct devlink *devlink,
@@ -419,6 +412,12 @@ int devlink_resource_size_get(struct devlink *devlink,
 int devlink_dpipe_table_resource_set(struct devlink *devlink,
                                     const char *table_name, u64 resource_id,
                                     u64 resource_units);
+void devlink_resource_occ_get_register(struct devlink *devlink,
+                                      u64 resource_id,
+                                      devlink_resource_occ_get_t *occ_get,
+                                      void *occ_get_priv);
+void devlink_resource_occ_get_unregister(struct devlink *devlink,
+                                        u64 resource_id);
 
 #else
 
@@ -562,8 +561,7 @@ devlink_resource_register(struct devlink *devlink,
                          u64 resource_size,
                          u64 resource_id,
                          u64 parent_resource_id,
-                         const struct devlink_resource_size_params *size_params,
-                         const struct devlink_resource_ops *resource_ops)
+                         const struct devlink_resource_size_params *size_params)
 {
        return 0;
 }
@@ -589,6 +587,20 @@ devlink_dpipe_table_resource_set(struct devlink *devlink,
        return -EOPNOTSUPP;
 }
 
+static inline void
+devlink_resource_occ_get_register(struct devlink *devlink,
+                                 u64 resource_id,
+                                 devlink_resource_occ_get_t *occ_get,
+                                 void *occ_get_priv)
+{
+}
+
+static inline void
+devlink_resource_occ_get_unregister(struct devlink *devlink,
+                                   u64 resource_id)
+{
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
index 899495589a7ea2bf693cdda42f83cec160e861b5..c7be1ca8e562f82ee9b24122cc21f45172458e23 100644 (file)
@@ -43,6 +43,7 @@ struct inet_timewait_sock {
 #define tw_family              __tw_common.skc_family
 #define tw_state               __tw_common.skc_state
 #define tw_reuse               __tw_common.skc_reuse
+#define tw_reuseport           __tw_common.skc_reuseport
 #define tw_ipv6only            __tw_common.skc_ipv6only
 #define tw_bound_dev_if                __tw_common.skc_bound_dev_if
 #define tw_node                        __tw_common.skc_nulls_node
index 36bb794f5cd65cddc60139e7f84024f9f7a1a4cb..902ff382a6dcc6e3f0c1a9a244061e5a84264915 100644 (file)
@@ -7,7 +7,7 @@
 
 static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining)
 {
-       return remaining >= sizeof(*rtnh) &&
+       return remaining >= (int)sizeof(*rtnh) &&
               rtnh->rtnh_len >= sizeof(*rtnh) &&
               rtnh->rtnh_len <= remaining;
 }
index 8716d5942b6561dc1cfd037ccfb7d732d26eba07..8fcf8908a694fb63a7316d1b0f760511d189e880 100644 (file)
@@ -127,6 +127,7 @@ typedef __u32 int32;
  */
 struct cstate {
        byte_t  cs_this;        /* connection id number (xmit) */
+       bool    initialized;    /* true if initialized */
        struct cstate *next;    /* next in ring (xmit) */
        struct iphdr cs_ip;     /* ip/tcp hdr from most recent packet */
        struct tcphdr cs_tcp;
diff --git a/include/trace/events/initcall.h b/include/trace/events/initcall.h
new file mode 100644 (file)
index 0000000..8d6cf10
--- /dev/null
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM initcall
+
+#if !defined(_TRACE_INITCALL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_INITCALL_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(initcall_level,
+
+       TP_PROTO(const char *level),
+
+       TP_ARGS(level),
+
+       TP_STRUCT__entry(
+               __string(level, level)
+       ),
+
+       TP_fast_assign(
+               __assign_str(level, level);
+       ),
+
+       TP_printk("level=%s", __get_str(level))
+);
+
+TRACE_EVENT(initcall_start,
+
+       TP_PROTO(initcall_t func),
+
+       TP_ARGS(func),
+
+       TP_STRUCT__entry(
+               __field(initcall_t, func)
+       ),
+
+       TP_fast_assign(
+               __entry->func = func;
+       ),
+
+       TP_printk("func=%pS", __entry->func)
+);
+
+TRACE_EVENT(initcall_finish,
+
+       TP_PROTO(initcall_t func, int ret),
+
+       TP_ARGS(func, ret),
+
+       TP_STRUCT__entry(
+               __field(initcall_t,     func)
+               __field(int,            ret)
+       ),
+
+       TP_fast_assign(
+               __entry->func = func;
+               __entry->ret = ret;
+       ),
+
+       TP_printk("func=%pS ret=%d", __entry->func, __entry->ret)
+);
+
+#endif /* if !defined(_TRACE_GPIO_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/rtc.h b/include/trace/events/rtc.h
new file mode 100644 (file)
index 0000000..621333f
--- /dev/null
@@ -0,0 +1,206 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rtc
+
+#if !defined(_TRACE_RTC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RTC_H
+
+#include <linux/rtc.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(rtc_time_alarm_class,
+
+       TP_PROTO(time64_t secs, int err),
+
+       TP_ARGS(secs, err),
+
+       TP_STRUCT__entry(
+               __field(time64_t, secs)
+               __field(int, err)
+       ),
+
+       TP_fast_assign(
+               __entry->secs = secs;
+               __entry->err = err;
+       ),
+
+       TP_printk("UTC (%lld) (%d)",
+                 __entry->secs, __entry->err
+       )
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_set_time,
+
+       TP_PROTO(time64_t secs, int err),
+
+       TP_ARGS(secs, err)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_read_time,
+
+       TP_PROTO(time64_t secs, int err),
+
+       TP_ARGS(secs, err)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_set_alarm,
+
+       TP_PROTO(time64_t secs, int err),
+
+       TP_ARGS(secs, err)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_read_alarm,
+
+       TP_PROTO(time64_t secs, int err),
+
+       TP_ARGS(secs, err)
+);
+
+TRACE_EVENT(rtc_irq_set_freq,
+
+       TP_PROTO(int freq, int err),
+
+       TP_ARGS(freq, err),
+
+       TP_STRUCT__entry(
+               __field(int, freq)
+               __field(int, err)
+       ),
+
+       TP_fast_assign(
+               __entry->freq = freq;
+               __entry->err = err;
+       ),
+
+       TP_printk("set RTC periodic IRQ frequency:%u (%d)",
+                 __entry->freq, __entry->err
+       )
+);
+
+TRACE_EVENT(rtc_irq_set_state,
+
+       TP_PROTO(int enabled, int err),
+
+       TP_ARGS(enabled, err),
+
+       TP_STRUCT__entry(
+               __field(int, enabled)
+               __field(int, err)
+       ),
+
+       TP_fast_assign(
+               __entry->enabled = enabled;
+               __entry->err = err;
+       ),
+
+       TP_printk("%s RTC 2^N Hz periodic IRQs (%d)",
+                 __entry->enabled ? "enable" : "disable",
+                 __entry->err
+       )
+);
+
+TRACE_EVENT(rtc_alarm_irq_enable,
+
+       TP_PROTO(unsigned int enabled, int err),
+
+       TP_ARGS(enabled, err),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, enabled)
+               __field(int, err)
+       ),
+
+       TP_fast_assign(
+               __entry->enabled = enabled;
+               __entry->err = err;
+       ),
+
+       TP_printk("%s RTC alarm IRQ (%d)",
+                 __entry->enabled ? "enable" : "disable",
+                 __entry->err
+       )
+);
+
+DECLARE_EVENT_CLASS(rtc_offset_class,
+
+       TP_PROTO(long offset, int err),
+
+       TP_ARGS(offset, err),
+
+       TP_STRUCT__entry(
+               __field(long, offset)
+               __field(int, err)
+       ),
+
+       TP_fast_assign(
+               __entry->offset = offset;
+               __entry->err = err;
+       ),
+
+       TP_printk("RTC offset: %ld (%d)",
+                 __entry->offset, __entry->err
+       )
+);
+
+DEFINE_EVENT(rtc_offset_class, rtc_set_offset,
+
+       TP_PROTO(long offset, int err),
+
+       TP_ARGS(offset, err)
+);
+
+DEFINE_EVENT(rtc_offset_class, rtc_read_offset,
+
+       TP_PROTO(long offset, int err),
+
+       TP_ARGS(offset, err)
+);
+
+DECLARE_EVENT_CLASS(rtc_timer_class,
+
+       TP_PROTO(struct rtc_timer *timer),
+
+       TP_ARGS(timer),
+
+       TP_STRUCT__entry(
+               __field(struct rtc_timer *, timer)
+               __field(ktime_t, expires)
+               __field(ktime_t, period)
+       ),
+
+       TP_fast_assign(
+               __entry->timer = timer;
+               __entry->expires = timer->node.expires;
+               __entry->period = timer->period;
+       ),
+
+       TP_printk("RTC timer:(%p) expires:%lld period:%lld",
+                 __entry->timer, __entry->expires, __entry->period
+       )
+);
+
+DEFINE_EVENT(rtc_timer_class, rtc_timer_enqueue,
+
+       TP_PROTO(struct rtc_timer *timer),
+
+       TP_ARGS(timer)
+);
+
+DEFINE_EVENT(rtc_timer_class, rtc_timer_dequeue,
+
+       TP_PROTO(struct rtc_timer *timer),
+
+       TP_ARGS(timer)
+);
+
+DEFINE_EVENT(rtc_timer_class, rtc_timer_fired,
+
+       TP_PROTO(struct rtc_timer *timer),
+
+       TP_ARGS(timer)
+);
+
+#endif /* _TRACE_RTC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index 6570c5b45ba158420058b21d95b4e4f88a136d01..a1cb91342231fed2c30234f3ceb1e257b3cea39f 100644 (file)
@@ -346,15 +346,9 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
 
        TP_PROTO(int nid,
                unsigned long nr_scanned, unsigned long nr_reclaimed,
-               unsigned long nr_dirty, unsigned long nr_writeback,
-               unsigned long nr_congested, unsigned long nr_immediate,
-               unsigned long nr_activate, unsigned long nr_ref_keep,
-               unsigned long nr_unmap_fail,
-               int priority, int file),
+               struct reclaim_stat *stat, int priority, int file),
 
-       TP_ARGS(nid, nr_scanned, nr_reclaimed, nr_dirty, nr_writeback,
-               nr_congested, nr_immediate, nr_activate, nr_ref_keep,
-               nr_unmap_fail, priority, file),
+       TP_ARGS(nid, nr_scanned, nr_reclaimed, stat, priority, file),
 
        TP_STRUCT__entry(
                __field(int, nid)
@@ -375,13 +369,13 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
                __entry->nid = nid;
                __entry->nr_scanned = nr_scanned;
                __entry->nr_reclaimed = nr_reclaimed;
-               __entry->nr_dirty = nr_dirty;
-               __entry->nr_writeback = nr_writeback;
-               __entry->nr_congested = nr_congested;
-               __entry->nr_immediate = nr_immediate;
-               __entry->nr_activate = nr_activate;
-               __entry->nr_ref_keep = nr_ref_keep;
-               __entry->nr_unmap_fail = nr_unmap_fail;
+               __entry->nr_dirty = stat->nr_dirty;
+               __entry->nr_writeback = stat->nr_writeback;
+               __entry->nr_congested = stat->nr_congested;
+               __entry->nr_immediate = stat->nr_immediate;
+               __entry->nr_activate = stat->nr_activate;
+               __entry->nr_ref_keep = stat->nr_ref_keep;
+               __entry->nr_unmap_fail = stat->nr_unmap_fail;
                __entry->priority = priority;
                __entry->reclaim_flags = trace_shrink_flags(file);
        ),
index f8b134f5608f3cbeb0d5afcbbf7963599f4f9e4b..e7ee32861d51d4b2e47b9182a48c05fe837b8d21 100644 (file)
@@ -27,6 +27,9 @@
 # define MAP_UNINITIALIZED 0x0         /* Don't support this flag */
 #endif
 
+/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */
+#define MAP_FIXED_NOREPLACE    0x100000        /* MAP_FIXED which doesn't unmap underlying mapping */
+
 /*
  * Flags for mlock
  */
index 92537757590aa94ec5d76c78d1fbf7c3fdb19de1..5ed721ad5b1985818ff2c783793ae7bad0d7b7f1 100644 (file)
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /* const.h: Macros for dealing with constants.  */
 
-#ifndef _LINUX_CONST_H
-#define _LINUX_CONST_H
+#ifndef _UAPI_LINUX_CONST_H
+#define _UAPI_LINUX_CONST_H
 
 /* Some constant macros are used in both assembler and
  * C code.  Therefore we cannot annotate them always with
 #define _AT(T,X)       ((T)(X))
 #endif
 
-#define _BITUL(x)      (_AC(1,UL) << (x))
-#define _BITULL(x)     (_AC(1,ULL) << (x))
+#define _UL(x)         (_AC(x, UL))
+#define _ULL(x)                (_AC(x, ULL))
 
-#endif /* !(_LINUX_CONST_H) */
+#define _BITUL(x)      (_UL(1) << (x))
+#define _BITULL(x)     (_ULL(1) << (x))
+
+#endif /* _UAPI_LINUX_CONST_H */
index 5d5ab81dc9be8ec46ad62228807e8e99153b3bf8..e4a0d9a9a9e80c8ade302d4a93c373e3a3f0423c 100644 (file)
@@ -7,6 +7,7 @@
 /* ipcs ctl commands */
 #define MSG_STAT 11
 #define MSG_INFO 12
+#define MSG_STAT_ANY 13
 
 /* msgrcv options */
 #define MSG_NOERROR     010000  /* no error if message is too big */
index 9c3e745b065628d893351139988b0af7024dda77..39a1876f039e1d46552278c09993ca61aebd7b63 100644 (file)
@@ -19,6 +19,7 @@
 /* ipcs ctl cmds */
 #define SEM_STAT 18
 #define SEM_INFO 19
+#define SEM_STAT_ANY 20
 
 /* Obsolete, used only for backwards compatibility and libc5 compiles */
 struct semid_ds {
index 4de12a39b07506e04fff9d658144b2feee136d50..dde1344f047cf04c541a21f4d983c9eb3c606ef5 100644 (file)
@@ -83,8 +83,9 @@ struct shmid_ds {
 #define SHM_UNLOCK     12
 
 /* ipcs ctl commands */
-#define SHM_STAT       13
-#define SHM_INFO       14
+#define SHM_STAT       13
+#define SHM_INFO       14
+#define SHM_STAT_ANY    15
 
 /* Obsolete, used only for backwards compatibility */
 struct shminfo {
index 4e8b8304b7939622a7f8590f052deee1f389d2b6..40297a3181ed8244fbb4492b34c0feed55612739 100644 (file)
@@ -53,7 +53,9 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
 #define VIRTIO_BALLOON_S_AVAIL    6   /* Available memory as in /proc */
 #define VIRTIO_BALLOON_S_CACHES   7   /* Disk caches */
-#define VIRTIO_BALLOON_S_NR       8
+#define VIRTIO_BALLOON_S_HTLB_PGALLOC  8  /* Hugetlb page allocations */
+#define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation failures */
+#define VIRTIO_BALLOON_S_NR       10
 
 /*
  * Memory statistics structure.
index 956455fc9f9a9bfa15ef9a7e4b516938361b41e2..bb29e5954000693c441b8d800ca531c6e05b3b4b 100644 (file)
@@ -19,7 +19,6 @@ struct display_timings;
 int of_get_display_timing(const struct device_node *np, const char *name,
                struct display_timing *dt);
 struct display_timings *of_get_display_timings(const struct device_node *np);
-int of_display_timings_exist(const struct device_node *np);
 #else
 static inline int of_get_display_timing(const struct device_node *np,
                const char *name, struct display_timing *dt)
@@ -31,10 +30,6 @@ of_get_display_timings(const struct device_node *np)
 {
        return NULL;
 }
-static inline int of_display_timings_exist(const struct device_node *np)
-{
-       return -ENOSYS;
-}
 #endif
 
 #endif
index 9b0eb574f0d155f0ac87893e538c21918b438eaf..6d1384abfbdf96edf21665022b9ef1eb9a5fe7d1 100644 (file)
@@ -42,6 +42,9 @@
 /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
 #define XENFEAT_mmu_pt_update_preserve_ad  5
 
+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist             6
+
 /*
  * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
  * available pte bits.
 /* operation as Dom0 is supported */
 #define XENFEAT_dom0                      11
 
+/* Xen also maps grant references at pfn = mfn.
+ * This feature flag is deprecated and should not be used.
+#define XENFEAT_grant_map_identity        12
+ */
+
+/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */
+#define XENFEAT_memory_op_vnode_supported 13
+
+/* arm: Hypervisor supports ARM SMC calling convention. */
+#define XENFEAT_ARM_SMCCC_supported       14
+
+/*
+ * x86/PVH: If set, ACPI RSDP can be placed at any address. Otherwise RSDP
+ * must be located in lower 1MB, as required by ACPI Specification for IA-PC
+ * systems.
+ * This feature flag is only consulted if XEN_ELFNOTE_GUEST_OS contains
+ * the "linux" string.
+ */
+#define XENFEAT_linux_rsdp_unrestricted   15
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
index 12c159824c7bd994132d289b49fbf651f6cca750..035a5f0ab26b9882d251784f2abd348d7eef2b9a 100644 (file)
@@ -255,7 +255,7 @@ int __init rd_load_image(char *from)
                nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
        for (i = 0, disk = 1; i < nblocks; i++) {
                if (i && (i % devblocks == 0)) {
-                       printk("done disk #%d.\n", disk++);
+                       pr_cont("done disk #%d.\n", disk++);
                        rotate = 0;
                        if (ksys_close(in_fd)) {
                                printk("Error closing the disk.\n");
@@ -278,7 +278,7 @@ int __init rd_load_image(char *from)
                }
 #endif
        }
-       printk("done.\n");
+       pr_cont("done.\n");
 
 successful_load:
        res = 1;
index e4a3160991ea4cfeac6b08f8dcba2510c997d369..b795aa341a3a30528f878db01f79907a2ac3e5b1 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/taskstats_kern.h>
 #include <linux/delayacct.h>
 #include <linux/unistd.h>
+#include <linux/utsname.h>
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
 #include <linux/key.h>
@@ -97,6 +98,9 @@
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/initcall.h>
+
 static int kernel_init(void *);
 
 extern void init_IRQ(void);
@@ -491,6 +495,17 @@ void __init __weak thread_stack_cache_init(void)
 
 void __init __weak mem_encrypt_init(void) { }
 
+bool initcall_debug;
+core_param(initcall_debug, initcall_debug, bool, 0644);
+
+#ifdef TRACEPOINTS_ENABLED
+static void __init initcall_debug_enable(void);
+#else
+static inline void initcall_debug_enable(void)
+{
+}
+#endif
+
 /*
  * Set up kernel memory allocators
  */
@@ -612,6 +627,9 @@ asmlinkage __visible void __init start_kernel(void)
        /* Trace events are available after this */
        trace_init();
 
+       if (initcall_debug)
+               initcall_debug_enable();
+
        context_tracking_init();
        /* init some links before init_ISA_irqs() */
        early_irq_init();
@@ -689,6 +707,7 @@ asmlinkage __visible void __init start_kernel(void)
        cred_init();
        fork_init();
        proc_caches_init();
+       uts_ns_init();
        buffer_init();
        key_init();
        security_init();
@@ -696,6 +715,7 @@ asmlinkage __visible void __init start_kernel(void)
        vfs_caches_init();
        pagecache_init();
        signals_init();
+       seq_file_init();
        proc_root_init();
        nsfs_init();
        cpuset_init();
@@ -728,9 +748,6 @@ static void __init do_ctors(void)
 #endif
 }
 
-bool initcall_debug;
-core_param(initcall_debug, initcall_debug, bool, 0644);
-
 #ifdef CONFIG_KALLSYMS
 struct blacklist_entry {
        struct list_head next;
@@ -800,37 +817,71 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn)
 #endif
 __setup("initcall_blacklist=", initcall_blacklist);
 
-static int __init_or_module do_one_initcall_debug(initcall_t fn)
+static __init_or_module void
+trace_initcall_start_cb(void *data, initcall_t fn)
 {
-       ktime_t calltime, delta, rettime;
-       unsigned long long duration;
-       int ret;
+       ktime_t *calltime = (ktime_t *)data;
 
        printk(KERN_DEBUG "calling  %pF @ %i\n", fn, task_pid_nr(current));
-       calltime = ktime_get();
-       ret = fn();
+       *calltime = ktime_get();
+}
+
+static __init_or_module void
+trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
+{
+       ktime_t *calltime = (ktime_t *)data;
+       ktime_t delta, rettime;
+       unsigned long long duration;
+
        rettime = ktime_get();
-       delta = ktime_sub(rettime, calltime);
+       delta = ktime_sub(rettime, *calltime);
        duration = (unsigned long long) ktime_to_ns(delta) >> 10;
        printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
                 fn, ret, duration);
+}
 
-       return ret;
+static ktime_t initcall_calltime;
+
+#ifdef TRACEPOINTS_ENABLED
+static void __init initcall_debug_enable(void)
+{
+       int ret;
+
+       ret = register_trace_initcall_start(trace_initcall_start_cb,
+                                           &initcall_calltime);
+       ret |= register_trace_initcall_finish(trace_initcall_finish_cb,
+                                             &initcall_calltime);
+       WARN(ret, "Failed to register initcall tracepoints\n");
 }
+# define do_trace_initcall_start       trace_initcall_start
+# define do_trace_initcall_finish      trace_initcall_finish
+#else
+static inline void do_trace_initcall_start(initcall_t fn)
+{
+       if (!initcall_debug)
+               return;
+       trace_initcall_start_cb(&initcall_calltime, fn);
+}
+static inline void do_trace_initcall_finish(initcall_t fn, int ret)
+{
+       if (!initcall_debug)
+               return;
+       trace_initcall_finish_cb(&initcall_calltime, fn, ret);
+}
+#endif /* !TRACEPOINTS_ENABLED */
 
 int __init_or_module do_one_initcall(initcall_t fn)
 {
        int count = preempt_count();
-       int ret;
        char msgbuf[64];
+       int ret;
 
        if (initcall_blacklisted(fn))
                return -EPERM;
 
-       if (initcall_debug)
-               ret = do_one_initcall_debug(fn);
-       else
-               ret = fn();
+       do_trace_initcall_start(fn);
+       ret = fn();
+       do_trace_initcall_finish(fn, ret);
 
        msgbuf[0] = 0;
 
@@ -874,7 +925,7 @@ static initcall_t *initcall_levels[] __initdata = {
 
 /* Keep these in sync with initcalls in include/linux/init.h */
 static char *initcall_level_names[] __initdata = {
-       "early",
+       "pure",
        "core",
        "postcore",
        "arch",
@@ -895,6 +946,7 @@ static void __init do_initcall_level(int level)
                   level, level,
                   NULL, &repair_env_string);
 
+       trace_initcall_level(initcall_level_names[level]);
        for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
                do_one_initcall(*fn);
 }
@@ -929,6 +981,7 @@ static void __init do_pre_smp_initcalls(void)
 {
        initcall_t *fn;
 
+       trace_initcall_level("early");
        for (fn = __initcall_start; fn < __initcall0_start; fn++)
                do_one_initcall(*fn);
 }
index 114a211896131c65b389f851b6bf33a7bef99ba1..56fd1c73eedccc93ea3f26f0f74e9dbe2c5b7bf8 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -497,14 +497,14 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
        memset(p, 0, sizeof(*p));
 
        rcu_read_lock();
-       if (cmd == MSG_STAT) {
+       if (cmd == MSG_STAT || cmd == MSG_STAT_ANY) {
                msq = msq_obtain_object(ns, msqid);
                if (IS_ERR(msq)) {
                        err = PTR_ERR(msq);
                        goto out_unlock;
                }
                id = msq->q_perm.id;
-       } else {
+       } else { /* IPC_STAT */
                msq = msq_obtain_object_check(ns, msqid);
                if (IS_ERR(msq)) {
                        err = PTR_ERR(msq);
@@ -512,9 +512,14 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
                }
        }
 
-       err = -EACCES;
-       if (ipcperms(ns, &msq->q_perm, S_IRUGO))
-               goto out_unlock;
+       /* see comment for SHM_STAT_ANY */
+       if (cmd == MSG_STAT_ANY)
+               audit_ipc_obj(&msq->q_perm);
+       else {
+               err = -EACCES;
+               if (ipcperms(ns, &msq->q_perm, S_IRUGO))
+                       goto out_unlock;
+       }
 
        err = security_msg_queue_msgctl(&msq->q_perm, cmd);
        if (err)
@@ -572,6 +577,7 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
                return err;
        }
        case MSG_STAT:  /* msqid is an index rather than a msg queue id */
+       case MSG_STAT_ANY:
        case IPC_STAT:
                err = msgctl_stat(ns, msqid, cmd, &msqid64);
                if (err < 0)
@@ -690,6 +696,7 @@ long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
        }
        case IPC_STAT:
        case MSG_STAT:
+       case MSG_STAT_ANY:
                err = msgctl_stat(ns, msqid, cmd, &msqid64);
                if (err < 0)
                        return err;
index 2994da8ccc7f6d77b72451e96d6c7f4dcc0764c0..06be75d9217ace222500b2ebfa276bdee524f6e3 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1220,14 +1220,14 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
        memset(semid64, 0, sizeof(*semid64));
 
        rcu_read_lock();
-       if (cmd == SEM_STAT) {
+       if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) {
                sma = sem_obtain_object(ns, semid);
                if (IS_ERR(sma)) {
                        err = PTR_ERR(sma);
                        goto out_unlock;
                }
                id = sma->sem_perm.id;
-       } else {
+       } else { /* IPC_STAT */
                sma = sem_obtain_object_check(ns, semid);
                if (IS_ERR(sma)) {
                        err = PTR_ERR(sma);
@@ -1235,9 +1235,14 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
                }
        }
 
-       err = -EACCES;
-       if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
-               goto out_unlock;
+       /* see comment for SHM_STAT_ANY */
+       if (cmd == SEM_STAT_ANY)
+               audit_ipc_obj(&sma->sem_perm);
+       else {
+               err = -EACCES;
+               if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
+                       goto out_unlock;
+       }
 
        err = security_sem_semctl(&sma->sem_perm, cmd);
        if (err)
@@ -1626,6 +1631,7 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)
                return semctl_info(ns, semid, cmd, p);
        case IPC_STAT:
        case SEM_STAT:
+       case SEM_STAT_ANY:
                err = semctl_stat(ns, semid, cmd, &semid64);
                if (err < 0)
                        return err;
@@ -1732,6 +1738,7 @@ long compat_ksys_semctl(int semid, int semnum, int cmd, int arg)
                return semctl_info(ns, semid, cmd, p);
        case IPC_STAT:
        case SEM_STAT:
+       case SEM_STAT_ANY:
                err = semctl_stat(ns, semid, cmd, &semid64);
                if (err < 0)
                        return err;
index acefe44fefefa187c838c3830d944b5d3a4c047d..5639345dbec9d4296d29702c352ced36cf781e35 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -415,7 +415,7 @@ static int shm_split(struct vm_area_struct *vma, unsigned long addr)
        struct file *file = vma->vm_file;
        struct shm_file_data *sfd = shm_file_data(file);
 
-       if (sfd->vm_ops && sfd->vm_ops->split)
+       if (sfd->vm_ops->split)
                return sfd->vm_ops->split(vma, addr);
 
        return 0;
@@ -947,14 +947,14 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
        memset(tbuf, 0, sizeof(*tbuf));
 
        rcu_read_lock();
-       if (cmd == SHM_STAT) {
+       if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
                shp = shm_obtain_object(ns, shmid);
                if (IS_ERR(shp)) {
                        err = PTR_ERR(shp);
                        goto out_unlock;
                }
                id = shp->shm_perm.id;
-       } else {
+       } else { /* IPC_STAT */
                shp = shm_obtain_object_check(ns, shmid);
                if (IS_ERR(shp)) {
                        err = PTR_ERR(shp);
@@ -962,9 +962,20 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
                }
        }
 
-       err = -EACCES;
-       if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
-               goto out_unlock;
+       /*
+        * Semantically SHM_STAT_ANY ought to be identical to
+        * that functionality provided by the /proc/sysvipc/
+        * interface. As such, only audit these calls and
+        * do not do traditional S_IRUGO permission checks on
+        * the ipc object.
+        */
+       if (cmd == SHM_STAT_ANY)
+               audit_ipc_obj(&shp->shm_perm);
+       else {
+               err = -EACCES;
+               if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
+                       goto out_unlock;
+       }
 
        err = security_shm_shmctl(&shp->shm_perm, cmd);
        if (err)
@@ -1104,6 +1115,7 @@ long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
                return err;
        }
        case SHM_STAT:
+       case SHM_STAT_ANY:
        case IPC_STAT: {
                err = shmctl_stat(ns, shmid, cmd, &sem64);
                if (err < 0)
@@ -1282,6 +1294,7 @@ long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
                return err;
        }
        case IPC_STAT:
+       case SHM_STAT_ANY:
        case SHM_STAT:
                err = shmctl_stat(ns, shmid, cmd, &sem64);
                if (err < 0)
index 3783b7991cc7e2b10745d36d933f66f5fd68bcda..4e81182fa0ac48cad2ed3f5afdd8141e161a920b 100644 (file)
@@ -89,6 +89,7 @@ static int __init ipc_init(void)
 {
        int err_sem, err_msg;
 
+       proc_mkdir("sysvipc", NULL);
        err_sem = sem_init();
        WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem);
        err_msg = msg_init();
index d2bda5aa25d7d9d0dc11cb85fc22c8b7557a5809..8dd9210d7db7851da0eb79b950803fedff9d89f3 100644 (file)
@@ -182,8 +182,10 @@ static void bpf_tcp_release(struct sock *sk)
                psock->cork = NULL;
        }
 
-       sk->sk_prot = psock->sk_proto;
-       psock->sk_proto = NULL;
+       if (psock->sk_proto) {
+               sk->sk_prot = psock->sk_proto;
+               psock->sk_proto = NULL;
+       }
 out:
        rcu_read_unlock();
 }
@@ -211,6 +213,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
        close_fun = psock->save_close;
 
        write_lock_bh(&sk->sk_callback_lock);
+       if (psock->cork) {
+               free_start_sg(psock->sock, psock->cork);
+               kfree(psock->cork);
+               psock->cork = NULL;
+       }
+
        list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
                list_del(&md->list);
                free_start_sg(psock->sock, md);
index 0244973ee54496cea3935e98e1325aac8d3d6cb7..4ca46df19c9a0f6cc7e83d70aec4ac91526667dc 100644 (file)
@@ -1226,18 +1226,6 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
        }
 }
 
-static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
-                                            enum bpf_attach_type attach_type)
-{
-       switch (prog->type) {
-       case BPF_PROG_TYPE_CGROUP_SOCK:
-       case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
-               return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
-       default:
-               return 0;
-       }
-}
-
 /* last field in 'union bpf_attr' used by this command */
 #define        BPF_PROG_LOAD_LAST_FIELD expected_attach_type
 
@@ -1465,6 +1453,18 @@ out_free_tp:
 
 #ifdef CONFIG_CGROUP_BPF
 
+static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
+                                            enum bpf_attach_type attach_type)
+{
+       switch (prog->type) {
+       case BPF_PROG_TYPE_CGROUP_SOCK:
+       case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+               return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
+       default:
+               return 0;
+       }
+}
+
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
 static int sockmap_get_from_fd(const union bpf_attr *attr,
index 90ff129c88a27c50e33be234be695650e7210494..62c301ad07735c8b5713aebe8980f8c61d44a83b 100644 (file)
@@ -242,11 +242,11 @@ static void kdb_printbp(kdb_bp_t *bp, int i)
        kdb_symbol_print(bp->bp_addr, NULL, KDB_SP_DEFAULT);
 
        if (bp->bp_enabled)
-               kdb_printf("\n    is enabled");
+               kdb_printf("\n    is enabled ");
        else
                kdb_printf("\n    is disabled");
 
-       kdb_printf("\taddr at %016lx, hardtype=%d installed=%d\n",
+       kdb_printf("  addr at %016lx, hardtype=%d installed=%d\n",
                   bp->bp_addr, bp->bp_type, bp->bp_installed);
 
        kdb_printf("\n");
index dbb0781a053364824f5361d07cfc72cf9fe0a2e2..e405677ee08d6ae6b71c36afd2270d2ed806810d 100644 (file)
@@ -1150,6 +1150,16 @@ void kdb_set_current_task(struct task_struct *p)
        kdb_current_regs = NULL;
 }
 
+static void drop_newline(char *buf)
+{
+       size_t len = strlen(buf);
+
+       if (len == 0)
+               return;
+       if (*(buf + len - 1) == '\n')
+               *(buf + len - 1) = '\0';
+}
+
 /*
  * kdb_local - The main code for kdb.  This routine is invoked on a
  *     specific processor, it is not global.  The main kdb() routine
@@ -1327,6 +1337,7 @@ do_full_getstr:
                cmdptr = cmd_head;
                diag = kdb_parse(cmdbuf);
                if (diag == KDB_NOTFOUND) {
+                       drop_newline(cmdbuf);
                        kdb_printf("Unknown kdb command: '%s'\n", cmdbuf);
                        diag = 0;
                }
@@ -1566,6 +1577,7 @@ static int kdb_md(int argc, const char **argv)
        int symbolic = 0;
        int valid = 0;
        int phys = 0;
+       int raw = 0;
 
        kdbgetintenv("MDCOUNT", &mdcount);
        kdbgetintenv("RADIX", &radix);
@@ -1575,9 +1587,10 @@ static int kdb_md(int argc, const char **argv)
        repeat = mdcount * 16 / bytesperword;
 
        if (strcmp(argv[0], "mdr") == 0) {
-               if (argc != 2)
+               if (argc == 2 || (argc == 0 && last_addr != 0))
+                       valid = raw = 1;
+               else
                        return KDB_ARGCOUNT;
-               valid = 1;
        } else if (isdigit(argv[0][2])) {
                bytesperword = (int)(argv[0][2] - '0');
                if (bytesperword == 0) {
@@ -1613,7 +1626,10 @@ static int kdb_md(int argc, const char **argv)
                radix = last_radix;
                bytesperword = last_bytesperword;
                repeat = last_repeat;
-               mdcount = ((repeat * bytesperword) + 15) / 16;
+               if (raw)
+                       mdcount = repeat;
+               else
+                       mdcount = ((repeat * bytesperword) + 15) / 16;
        }
 
        if (argc) {
@@ -1630,7 +1646,10 @@ static int kdb_md(int argc, const char **argv)
                        diag = kdbgetularg(argv[nextarg], &val);
                        if (!diag) {
                                mdcount = (int) val;
-                               repeat = mdcount * 16 / bytesperword;
+                               if (raw)
+                                       repeat = mdcount;
+                               else
+                                       repeat = mdcount * 16 / bytesperword;
                        }
                }
                if (argc >= nextarg+1) {
@@ -1640,8 +1659,15 @@ static int kdb_md(int argc, const char **argv)
                }
        }
 
-       if (strcmp(argv[0], "mdr") == 0)
-               return kdb_mdr(addr, mdcount);
+       if (strcmp(argv[0], "mdr") == 0) {
+               int ret;
+               last_addr = addr;
+               ret = kdb_mdr(addr, mdcount);
+               last_addr += mdcount;
+               last_repeat = mdcount;
+               last_bytesperword = bytesperword; // to make REPEAT happy
+               return ret;
+       }
 
        switch (radix) {
        case 10:
@@ -2473,41 +2499,6 @@ static int kdb_kill(int argc, const char **argv)
        return 0;
 }
 
-struct kdb_tm {
-       int tm_sec;     /* seconds */
-       int tm_min;     /* minutes */
-       int tm_hour;    /* hours */
-       int tm_mday;    /* day of the month */
-       int tm_mon;     /* month */
-       int tm_year;    /* year */
-};
-
-static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
-{
-       /* This will work from 1970-2099, 2100 is not a leap year */
-       static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31,
-                                31, 30, 31, 30, 31 };
-       memset(tm, 0, sizeof(*tm));
-       tm->tm_sec  = tv->tv_sec % (24 * 60 * 60);
-       tm->tm_mday = tv->tv_sec / (24 * 60 * 60) +
-               (2 * 365 + 1); /* shift base from 1970 to 1968 */
-       tm->tm_min =  tm->tm_sec / 60 % 60;
-       tm->tm_hour = tm->tm_sec / 60 / 60;
-       tm->tm_sec =  tm->tm_sec % 60;
-       tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1));
-       tm->tm_mday %= (4*365+1);
-       mon_day[1] = 29;
-       while (tm->tm_mday >= mon_day[tm->tm_mon]) {
-               tm->tm_mday -= mon_day[tm->tm_mon];
-               if (++tm->tm_mon == 12) {
-                       tm->tm_mon = 0;
-                       ++tm->tm_year;
-                       mon_day[1] = 28;
-               }
-       }
-       ++tm->tm_mday;
-}
-
 /*
  * Most of this code has been lifted from kernel/timer.c::sys_sysinfo().
  * I cannot call that code directly from kdb, it has an unconditional
@@ -2515,10 +2506,10 @@ static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
  */
 static void kdb_sysinfo(struct sysinfo *val)
 {
-       struct timespec uptime;
-       ktime_get_ts(&uptime);
+       u64 uptime = ktime_get_mono_fast_ns();
+
        memset(val, 0, sizeof(*val));
-       val->uptime = uptime.tv_sec;
+       val->uptime = div_u64(uptime, NSEC_PER_SEC);
        val->loads[0] = avenrun[0];
        val->loads[1] = avenrun[1];
        val->loads[2] = avenrun[2];
@@ -2533,8 +2524,8 @@ static void kdb_sysinfo(struct sysinfo *val)
  */
 static int kdb_summary(int argc, const char **argv)
 {
-       struct timespec now;
-       struct kdb_tm tm;
+       time64_t now;
+       struct tm tm;
        struct sysinfo val;
 
        if (argc)
@@ -2548,9 +2539,9 @@ static int kdb_summary(int argc, const char **argv)
        kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
        kdb_printf("ccversion  %s\n", __stringify(CCVERSION));
 
-       now = __current_kernel_time();
-       kdb_gmtime(&now, &tm);
-       kdb_printf("date       %04d-%02d-%02d %02d:%02d:%02d "
+       now = __ktime_get_real_seconds();
+       time64_to_tm(now, 0, &tm);
+       kdb_printf("date       %04ld-%02d-%02d %02d:%02d:%02d "
                   "tz_minuteswest %d\n",
                1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
                tm.tm_hour, tm.tm_min, tm.tm_sec,
index d35cc2d3a4cc08c548dad03dee074b9bedaa2da2..990b3cc526c80d2162d79f0524dbd83932418cdf 100644 (file)
@@ -129,13 +129,13 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
                }
                if (i >= ARRAY_SIZE(kdb_name_table)) {
                        debug_kfree(kdb_name_table[0]);
-                       memcpy(kdb_name_table, kdb_name_table+1,
+                       memmove(kdb_name_table, kdb_name_table+1,
                               sizeof(kdb_name_table[0]) *
                               (ARRAY_SIZE(kdb_name_table)-1));
                } else {
                        debug_kfree(knt1);
                        knt1 = kdb_name_table[i];
-                       memcpy(kdb_name_table+i, kdb_name_table+i+1,
+                       memmove(kdb_name_table+i, kdb_name_table+i+1,
                               sizeof(kdb_name_table[0]) *
                               (ARRAY_SIZE(kdb_name_table)-i-1));
                }
index 9d833d913c84d66a6e41ca5dc7ad3307e4b65ea9..42e48748855420af5fa2a0d45f04e54298546b9e 100644 (file)
@@ -34,7 +34,8 @@
 #define PANIC_BLINK_SPD 18
 
 int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
-static unsigned long tainted_mask;
+static unsigned long tainted_mask =
+       IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
@@ -308,52 +309,40 @@ EXPORT_SYMBOL(panic);
  * is being removed anyway.
  */
 const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
-       { 'P', 'G', true },     /* TAINT_PROPRIETARY_MODULE */
-       { 'F', ' ', true },     /* TAINT_FORCED_MODULE */
-       { 'S', ' ', false },    /* TAINT_CPU_OUT_OF_SPEC */
-       { 'R', ' ', false },    /* TAINT_FORCED_RMMOD */
-       { 'M', ' ', false },    /* TAINT_MACHINE_CHECK */
-       { 'B', ' ', false },    /* TAINT_BAD_PAGE */
-       { 'U', ' ', false },    /* TAINT_USER */
-       { 'D', ' ', false },    /* TAINT_DIE */
-       { 'A', ' ', false },    /* TAINT_OVERRIDDEN_ACPI_TABLE */
-       { 'W', ' ', false },    /* TAINT_WARN */
-       { 'C', ' ', true },     /* TAINT_CRAP */
-       { 'I', ' ', false },    /* TAINT_FIRMWARE_WORKAROUND */
-       { 'O', ' ', true },     /* TAINT_OOT_MODULE */
-       { 'E', ' ', true },     /* TAINT_UNSIGNED_MODULE */
-       { 'L', ' ', false },    /* TAINT_SOFTLOCKUP */
-       { 'K', ' ', true },     /* TAINT_LIVEPATCH */
-       { 'X', ' ', true },     /* TAINT_AUX */
+       [ TAINT_PROPRIETARY_MODULE ]    = { 'P', 'G', true },
+       [ TAINT_FORCED_MODULE ]         = { 'F', ' ', true },
+       [ TAINT_CPU_OUT_OF_SPEC ]       = { 'S', ' ', false },
+       [ TAINT_FORCED_RMMOD ]          = { 'R', ' ', false },
+       [ TAINT_MACHINE_CHECK ]         = { 'M', ' ', false },
+       [ TAINT_BAD_PAGE ]              = { 'B', ' ', false },
+       [ TAINT_USER ]                  = { 'U', ' ', false },
+       [ TAINT_DIE ]                   = { 'D', ' ', false },
+       [ TAINT_OVERRIDDEN_ACPI_TABLE ] = { 'A', ' ', false },
+       [ TAINT_WARN ]                  = { 'W', ' ', false },
+       [ TAINT_CRAP ]                  = { 'C', ' ', true },
+       [ TAINT_FIRMWARE_WORKAROUND ]   = { 'I', ' ', false },
+       [ TAINT_OOT_MODULE ]            = { 'O', ' ', true },
+       [ TAINT_UNSIGNED_MODULE ]       = { 'E', ' ', true },
+       [ TAINT_SOFTLOCKUP ]            = { 'L', ' ', false },
+       [ TAINT_LIVEPATCH ]             = { 'K', ' ', true },
+       [ TAINT_AUX ]                   = { 'X', ' ', true },
+       [ TAINT_RANDSTRUCT ]            = { 'T', ' ', true },
 };
 
 /**
- *     print_tainted - return a string to represent the kernel taint state.
+ * print_tainted - return a string to represent the kernel taint state.
  *
- *  'P' - Proprietary module has been loaded.
- *  'F' - Module has been forcibly loaded.
- *  'S' - SMP with CPUs not designed for SMP.
- *  'R' - User forced a module unload.
- *  'M' - System experienced a machine check exception.
- *  'B' - System has hit bad_page.
- *  'U' - Userspace-defined naughtiness.
- *  'D' - Kernel has oopsed before
- *  'A' - ACPI table overridden.
- *  'W' - Taint on warning.
- *  'C' - modules from drivers/staging are loaded.
- *  'I' - Working around severe firmware bug.
- *  'O' - Out-of-tree module has been loaded.
- *  'E' - Unsigned module has been loaded.
- *  'L' - A soft lockup has previously occurred.
- *  'K' - Kernel has been live patched.
- *  'X' - Auxiliary taint, for distros' use.
+ * For individual taint flag meanings, see Documentation/sysctl/kernel.txt
  *
- *     The string is overwritten by the next call to print_tainted().
+ * The string is overwritten by the next call to print_tainted(),
+ * but is always NULL terminated.
  */
 const char *print_tainted(void)
 {
        static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")];
 
+       BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT);
+
        if (tainted_mask) {
                char *s;
                int i;
@@ -554,6 +543,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
        else
                dump_stack();
 
+       print_irqtrace_events(current);
+
        print_oops_end_marker();
 
        /* Just a warning, don't kill lockdep. */
index cc9108c2a1fde209f5fe8b41e4cfca2e9e4d1310..ce89f757e6da04ab0f8ee97c66e4dbce1033dc94 100644 (file)
@@ -111,8 +111,8 @@ bool parameq(const char *a, const char *b)
 static void param_check_unsafe(const struct kernel_param *kp)
 {
        if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
-               pr_warn("Setting dangerous option %s - tainting kernel\n",
-                       kp->name);
+               pr_notice("Setting dangerous option %s - tainting kernel\n",
+                         kp->name);
                add_taint(TAINT_USER, LOCKDEP_STILL_OK);
        }
 }
index ed6c343fe50dff7b546e43fd89102f43b898417e..157fe4b199713b65cb62c6d591225306cd5289f9 100644 (file)
@@ -70,7 +70,7 @@ int pid_max_max = PID_MAX_LIMIT;
  */
 struct pid_namespace init_pid_ns = {
        .kref = KREF_INIT(2),
-       .idr = IDR_INIT,
+       .idr = IDR_INIT(init_pid_ns.idr),
        .pid_allocated = PIDNS_ADDING,
        .level = 0,
        .child_reaper = &init_task,
index 9d7503910ce220302c29a9ea4f3ba0e2fab8ef26..fa39092b7aea2c2d715fe47b170d76230f549ba1 100644 (file)
@@ -295,6 +295,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
                 * changed
                 */
                plist_del(node, &c->list);
+               /* fall through */
        case PM_QOS_ADD_REQ:
                plist_node_init(node, new_value);
                plist_add(node, &c->list);
@@ -367,6 +368,7 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf,
                break;
        case PM_QOS_UPDATE_REQ:
                pm_qos_flags_remove_req(pqf, req);
+               /* fall through */
        case PM_QOS_ADD_REQ:
                req->flags = val;
                INIT_LIST_HEAD(&req->node);
index 704e55129c3af254210be32ca2396f74a9e937be..2f4af216bd6ef97f31b6ffcd0805a85ecfa652ee 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/uaccess.h>
 #include <asm/sections.h>
 
+#include <trace/events/initcall.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/printk.h>
 
@@ -2780,6 +2781,7 @@ EXPORT_SYMBOL(unregister_console);
  */
 void __init console_init(void)
 {
+       int ret;
        initcall_t *call;
 
        /* Setup the default TTY line discipline. */
@@ -2790,8 +2792,11 @@ void __init console_init(void)
         * inform about problems etc..
         */
        call = __con_initcall_start;
+       trace_initcall_level("console");
        while (call < __con_initcall_end) {
-               (*call)();
+               trace_initcall_start((*call));
+               ret = (*call)();
+               trace_initcall_finish((*call), ret);
                call++;
        }
 }
index 2975f195e1c40427acc33f938a3c9c0174ab5db2..1a3e9bddd17b67955f5f0446310a090872d7eacf 100644 (file)
@@ -141,13 +141,15 @@ static void cpuidle_idle_call(void)
        }
 
        /*
-        * Tell the RCU framework we are entering an idle section,
-        * so no more rcu read side critical sections and one more
+        * The RCU framework needs to be told that we are entering an idle
+        * section, so no more rcu read side critical sections and one more
         * step to the grace period
         */
-       rcu_idle_enter();
 
        if (cpuidle_not_available(drv, dev)) {
+               tick_nohz_idle_stop_tick();
+               rcu_idle_enter();
+
                default_idle_call();
                goto exit_idle;
        }
@@ -164,20 +166,37 @@ static void cpuidle_idle_call(void)
 
        if (idle_should_enter_s2idle() || dev->use_deepest_state) {
                if (idle_should_enter_s2idle()) {
+                       rcu_idle_enter();
+
                        entered_state = cpuidle_enter_s2idle(drv, dev);
                        if (entered_state > 0) {
                                local_irq_enable();
                                goto exit_idle;
                        }
+
+                       rcu_idle_exit();
                }
 
+               tick_nohz_idle_stop_tick();
+               rcu_idle_enter();
+
                next_state = cpuidle_find_deepest_state(drv, dev);
                call_cpuidle(drv, dev, next_state);
        } else {
+               bool stop_tick = true;
+
                /*
                 * Ask the cpuidle framework to choose a convenient idle state.
                 */
-               next_state = cpuidle_select(drv, dev);
+               next_state = cpuidle_select(drv, dev, &stop_tick);
+
+               if (stop_tick)
+                       tick_nohz_idle_stop_tick();
+               else
+                       tick_nohz_idle_retain_tick();
+
+               rcu_idle_enter();
+
                entered_state = call_cpuidle(drv, dev, next_state);
                /*
                 * Give the governor an opportunity to reflect on the outcome
@@ -222,6 +241,7 @@ static void do_idle(void)
                rmb();
 
                if (cpu_is_offline(cpu)) {
+                       tick_nohz_idle_stop_tick_protected();
                        cpuhp_report_idle_dead();
                        arch_cpu_idle_dead();
                }
@@ -235,10 +255,12 @@ static void do_idle(void)
                 * broadcast device expired for us, we don't want to go deep
                 * idle as we know that the IPI is going to arrive right away.
                 */
-               if (cpu_idle_force_poll || tick_check_broadcast_expired())
+               if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
+                       tick_nohz_idle_restart_tick();
                        cpu_idle_poll();
-               else
+               } else {
                        cpuidle_idle_call();
+               }
                arch_cpu_idle_exit();
        }
 
index bdf7090b106dadb817ad924b09a2cafcc4fc26de..6a78cf70761db3c436316f8e3f7453d20962dc08 100644 (file)
@@ -1340,7 +1340,7 @@ static struct ctl_table vm_table[] = {
        {
                .procname       = "dirtytime_expire_seconds",
                .data           = &dirtytime_expire_interval,
-               .maxlen         = sizeof(dirty_expire_interval),
+               .maxlen         = sizeof(dirtytime_expire_interval),
                .mode           = 0644,
                .proc_handler   = dirtytime_interval_handler,
                .extra1         = &zero,
@@ -2511,6 +2511,15 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 }
 #endif
 
+/**
+ * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
+ * @min: pointer to minimum allowable value
+ * @max: pointer to maximum allowable value
+ *
+ * The do_proc_dointvec_minmax_conv_param structure provides the
+ * minimum and maximum values for doing range checking for those sysctl
+ * parameters that use the proc_dointvec_minmax() handler.
+ */
 struct do_proc_dointvec_minmax_conv_param {
        int *min;
        int *max;
@@ -2554,7 +2563,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
  * This routine will ensure the values are within the range specified by
  * table->extra1 (min) and table->extra2 (max).
  *
- * Returns 0 on success.
+ * Returns 0 on success or -EINVAL on write when the range check fails.
  */
 int proc_dointvec_minmax(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2567,6 +2576,15 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
                                do_proc_dointvec_minmax_conv, &param);
 }
 
+/**
+ * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
+ * @min: pointer to minimum allowable value
+ * @max: pointer to maximum allowable value
+ *
+ * The do_proc_douintvec_minmax_conv_param structure provides the
+ * minimum and maximum values for doing range checking for those sysctl
+ * parameters that use the proc_douintvec_minmax() handler.
+ */
 struct do_proc_douintvec_minmax_conv_param {
        unsigned int *min;
        unsigned int *max;
@@ -2614,7 +2632,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
  * check for UINT_MAX to avoid having to support wrap around uses from
  * userspace.
  *
- * Returns 0 on success.
+ * Returns 0 on success or -ERANGE on write when the range check fails.
  */
 int proc_douintvec_minmax(struct ctl_table *table, int write,
                          void __user *buffer, size_t *lenp, loff_t *ppos)
index 9b082ce86325feb8cd6cc423f81b78198c3a80bd..eda1210ce50f88dd88a53ab9fafbfeef5da021ca 100644 (file)
@@ -480,6 +480,7 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
        while ((base = __next_base((cpu_base), &(active))))
 
 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
+                                        const struct hrtimer *exclude,
                                         unsigned int active,
                                         ktime_t expires_next)
 {
@@ -492,9 +493,22 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
 
                next = timerqueue_getnext(&base->active);
                timer = container_of(next, struct hrtimer, node);
+               if (timer == exclude) {
+                       /* Get to the next timer in the queue. */
+                       next = timerqueue_iterate_next(next);
+                       if (!next)
+                               continue;
+
+                       timer = container_of(next, struct hrtimer, node);
+               }
                expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
                if (expires < expires_next) {
                        expires_next = expires;
+
+                       /* Skip cpu_base update if a timer is being excluded. */
+                       if (exclude)
+                               continue;
+
                        if (timer->is_soft)
                                cpu_base->softirq_next_timer = timer;
                        else
@@ -538,7 +552,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
        if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
                active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
                cpu_base->softirq_next_timer = NULL;
-               expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX);
+               expires_next = __hrtimer_next_event_base(cpu_base, NULL,
+                                                        active, KTIME_MAX);
 
                next_timer = cpu_base->softirq_next_timer;
        }
@@ -546,7 +561,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
        if (active_mask & HRTIMER_ACTIVE_HARD) {
                active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
                cpu_base->next_timer = next_timer;
-               expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+               expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
+                                                        expires_next);
        }
 
        return expires_next;
@@ -1190,6 +1206,39 @@ u64 hrtimer_get_next_event(void)
 
        return expires;
 }
+
+/**
+ * hrtimer_next_event_without - time until next expiry event w/o one timer
+ * @exclude:   timer to exclude
+ *
+ * Returns the next expiry time over all timers except for the @exclude one or
+ * KTIME_MAX if none of them is pending.
+ */
+u64 hrtimer_next_event_without(const struct hrtimer *exclude)
+{
+       struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+       u64 expires = KTIME_MAX;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&cpu_base->lock, flags);
+
+       if (__hrtimer_hres_active(cpu_base)) {
+               unsigned int active;
+
+               if (!cpu_base->softirq_activated) {
+                       active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
+                       expires = __hrtimer_next_event_base(cpu_base, exclude,
+                                                           active, KTIME_MAX);
+               }
+               active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+               expires = __hrtimer_next_event_base(cpu_base, exclude, active,
+                                                   expires);
+       }
+
+       raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+
+       return expires;
+}
 #endif
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
index 8d70da1b9a0d21bb8057eacd47362f45314c254a..a09ded765f6c50f6c0d9ba46c30644b539d1472a 100644 (file)
@@ -31,7 +31,7 @@
 
 
 /* USER_HZ period (usecs): */
-unsigned long                  tick_usec = TICK_USEC;
+unsigned long                  tick_usec = USER_TICK_USEC;
 
 /* SHIFTED_HZ period (nsecs): */
 unsigned long                  tick_nsec;
index f3ab08caa2c3aa6bc61dbe1415b16f783b6ae18f..646645e981f942480d49857fc84ef2d5bcd14791 100644 (file)
@@ -122,8 +122,7 @@ static ktime_t tick_init_jiffy_update(void)
        return period;
 }
 
-
-static void tick_sched_do_timer(ktime_t now)
+static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 {
        int cpu = smp_processor_id();
 
@@ -143,6 +142,9 @@ static void tick_sched_do_timer(ktime_t now)
        /* Check, if the jiffies need an update */
        if (tick_do_timer_cpu == cpu)
                tick_do_update_jiffies64(now);
+
+       if (ts->inidle)
+               ts->got_idle_tick = 1;
 }
 
 static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
@@ -474,7 +476,9 @@ __setup("nohz=", setup_tick_nohz);
 
 bool tick_nohz_tick_stopped(void)
 {
-       return __this_cpu_read(tick_cpu_sched.tick_stopped);
+       struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+       return ts->tick_stopped;
 }
 
 bool tick_nohz_tick_stopped_cpu(int cpu)
@@ -537,14 +541,11 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
        sched_clock_idle_wakeup_event();
 }
 
-static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
+static void tick_nohz_start_idle(struct tick_sched *ts)
 {
-       ktime_t now = ktime_get();
-
-       ts->idle_entrytime = now;
+       ts->idle_entrytime = ktime_get();
        ts->idle_active = 1;
        sched_clock_idle_sleep_event();
-       return now;
 }
 
 /**
@@ -653,13 +654,10 @@ static inline bool local_timer_softirq_pending(void)
        return local_softirq_pending() & TIMER_SOFTIRQ;
 }
 
-static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
-                                        ktime_t now, int cpu)
+static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
-       struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
        u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
        unsigned long seq, basejiff;
-       ktime_t tick;
 
        /* Read jiffies and the time when jiffies were updated last */
        do {
@@ -668,6 +666,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
                basejiff = jiffies;
        } while (read_seqretry(&jiffies_lock, seq));
        ts->last_jiffies = basejiff;
+       ts->timer_expires_base = basemono;
 
        /*
         * Keep the periodic tick, when RCU, architecture or irq_work
@@ -712,47 +711,63 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
                 * next period, so no point in stopping it either, bail.
                 */
                if (!ts->tick_stopped) {
-                       tick = 0;
+                       ts->timer_expires = 0;
                        goto out;
                }
        }
 
+       /*
+        * If this CPU is the one which had the do_timer() duty last, we limit
+        * the sleep time to the timekeeping max_deferment value.
+        * Otherwise we can sleep as long as we want.
+        */
+       delta = timekeeping_max_deferment();
+       if (cpu != tick_do_timer_cpu &&
+           (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
+               delta = KTIME_MAX;
+
+       /* Calculate the next expiry time */
+       if (delta < (KTIME_MAX - basemono))
+               expires = basemono + delta;
+       else
+               expires = KTIME_MAX;
+
+       ts->timer_expires = min_t(u64, expires, next_tick);
+
+out:
+       return ts->timer_expires;
+}
+
+static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
+{
+       struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
+       u64 basemono = ts->timer_expires_base;
+       u64 expires = ts->timer_expires;
+       ktime_t tick = expires;
+
+       /* Make sure we won't be trying to stop it twice in a row. */
+       ts->timer_expires_base = 0;
+
        /*
         * If this CPU is the one which updates jiffies, then give up
         * the assignment and let it be taken by the CPU which runs
         * the tick timer next, which might be this CPU as well. If we
         * don't drop this here the jiffies might be stale and
         * do_timer() never invoked. Keep track of the fact that it
-        * was the one which had the do_timer() duty last. If this CPU
-        * is the one which had the do_timer() duty last, we limit the
-        * sleep time to the timekeeping max_deferment value.
-        * Otherwise we can sleep as long as we want.
+        * was the one which had the do_timer() duty last.
         */
-       delta = timekeeping_max_deferment();
        if (cpu == tick_do_timer_cpu) {
                tick_do_timer_cpu = TICK_DO_TIMER_NONE;
                ts->do_timer_last = 1;
        } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
-               delta = KTIME_MAX;
                ts->do_timer_last = 0;
-       } else if (!ts->do_timer_last) {
-               delta = KTIME_MAX;
        }
 
-       /* Calculate the next expiry time */
-       if (delta < (KTIME_MAX - basemono))
-               expires = basemono + delta;
-       else
-               expires = KTIME_MAX;
-
-       expires = min_t(u64, expires, next_tick);
-       tick = expires;
-
        /* Skip reprogram of event if its not changed */
        if (ts->tick_stopped && (expires == ts->next_tick)) {
                /* Sanity check: make sure clockevent is actually programmed */
                if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
-                       goto out;
+                       return;
 
                WARN_ON_ONCE(1);
                printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
@@ -786,7 +801,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
        if (unlikely(expires == KTIME_MAX)) {
                if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
                        hrtimer_cancel(&ts->sched_timer);
-               goto out;
+               return;
        }
 
        hrtimer_set_expires(&ts->sched_timer, tick);
@@ -795,15 +810,23 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
                hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
        else
                tick_program_event(tick, 1);
-out:
-       /*
-        * Update the estimated sleep length until the next timer
-        * (not only the tick).
-        */
-       ts->sleep_length = ktime_sub(dev->next_event, now);
-       return tick;
 }
 
+static void tick_nohz_retain_tick(struct tick_sched *ts)
+{
+       ts->timer_expires_base = 0;
+}
+
+#ifdef CONFIG_NO_HZ_FULL
+static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
+{
+       if (tick_nohz_next_event(ts, cpu))
+               tick_nohz_stop_tick(ts, cpu);
+       else
+               tick_nohz_retain_tick(ts);
+}
+#endif /* CONFIG_NO_HZ_FULL */
+
 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 {
        /* Update jiffies first */
@@ -839,7 +862,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
                return;
 
        if (can_stop_full_tick(cpu, ts))
-               tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
+               tick_nohz_stop_sched_tick(ts, cpu);
        else if (ts->tick_stopped)
                tick_nohz_restart_sched_tick(ts, ktime_get());
 #endif
@@ -865,10 +888,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
                return false;
        }
 
-       if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
-               ts->sleep_length = NSEC_PER_SEC / HZ;
+       if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
                return false;
-       }
 
        if (need_resched())
                return false;
@@ -903,42 +924,65 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
        return true;
 }
 
-static void __tick_nohz_idle_enter(struct tick_sched *ts)
+static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
 {
-       ktime_t now, expires;
+       ktime_t expires;
        int cpu = smp_processor_id();
 
-       now = tick_nohz_start_idle(ts);
+       /*
+        * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
+        * tick timer expiration time is known already.
+        */
+       if (ts->timer_expires_base)
+               expires = ts->timer_expires;
+       else if (can_stop_idle_tick(cpu, ts))
+               expires = tick_nohz_next_event(ts, cpu);
+       else
+               return;
+
+       ts->idle_calls++;
 
-       if (can_stop_idle_tick(cpu, ts)) {
+       if (expires > 0LL) {
                int was_stopped = ts->tick_stopped;
 
-               ts->idle_calls++;
+               tick_nohz_stop_tick(ts, cpu);
 
-               expires = tick_nohz_stop_sched_tick(ts, now, cpu);
-               if (expires > 0LL) {
-                       ts->idle_sleeps++;
-                       ts->idle_expires = expires;
-               }
+               ts->idle_sleeps++;
+               ts->idle_expires = expires;
 
                if (!was_stopped && ts->tick_stopped) {
                        ts->idle_jiffies = ts->last_jiffies;
                        nohz_balance_enter_idle(cpu);
                }
+       } else {
+               tick_nohz_retain_tick(ts);
        }
 }
 
 /**
- * tick_nohz_idle_enter - stop the idle tick from the idle task
+ * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
  *
  * When the next event is more than a tick into the future, stop the idle tick
- * Called when we start the idle loop.
- *
- * The arch is responsible of calling:
+ */
+void tick_nohz_idle_stop_tick(void)
+{
+       __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
+}
+
+void tick_nohz_idle_retain_tick(void)
+{
+       tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
+       /*
+        * Undo the effect of get_next_timer_interrupt() called from
+        * tick_nohz_next_event().
+        */
+       timer_clear_idle();
+}
+
+/**
+ * tick_nohz_idle_enter - prepare for entering idle on the current CPU
  *
- * - rcu_idle_enter() after its last use of RCU before the CPU is put
- *  to sleep.
- * - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
+ * Called when we start the idle loop.
  */
 void tick_nohz_idle_enter(void)
 {
@@ -949,8 +993,11 @@ void tick_nohz_idle_enter(void)
        local_irq_disable();
 
        ts = this_cpu_ptr(&tick_cpu_sched);
+
+       WARN_ON_ONCE(ts->timer_expires_base);
+
        ts->inidle = 1;
-       __tick_nohz_idle_enter(ts);
+       tick_nohz_start_idle(ts);
 
        local_irq_enable();
 }
@@ -968,21 +1015,62 @@ void tick_nohz_irq_exit(void)
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
 
        if (ts->inidle)
-               __tick_nohz_idle_enter(ts);
+               tick_nohz_start_idle(ts);
        else
                tick_nohz_full_update_tick(ts);
 }
 
 /**
- * tick_nohz_get_sleep_length - return the length of the current sleep
+ * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
+ */
+bool tick_nohz_idle_got_tick(void)
+{
+       struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+       if (ts->got_idle_tick) {
+               ts->got_idle_tick = 0;
+               return true;
+       }
+       return false;
+}
+
+/**
+ * tick_nohz_get_sleep_length - return the expected length of the current sleep
+ * @delta_next: duration until the next event if the tick cannot be stopped
  *
  * Called from power state control code with interrupts disabled
  */
-ktime_t tick_nohz_get_sleep_length(void)
+ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
 {
+       struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+       int cpu = smp_processor_id();
+       /*
+        * The idle entry time is expected to be a sufficient approximation of
+        * the current time at this point.
+        */
+       ktime_t now = ts->idle_entrytime;
+       ktime_t next_event;
+
+       WARN_ON_ONCE(!ts->inidle);
+
+       *delta_next = ktime_sub(dev->next_event, now);
+
+       if (!can_stop_idle_tick(cpu, ts))
+               return *delta_next;
+
+       next_event = tick_nohz_next_event(ts, cpu);
+       if (!next_event)
+               return *delta_next;
+
+       /*
+        * If the next highres timer to expire is earlier than next_event, the
+        * idle governor needs to know that.
+        */
+       next_event = min_t(u64, next_event,
+                          hrtimer_next_event_without(&ts->sched_timer));
 
-       return ts->sleep_length;
+       return ktime_sub(next_event, now);
 }
 
 /**
@@ -1031,6 +1119,20 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 #endif
 }
 
+static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
+{
+       tick_nohz_restart_sched_tick(ts, now);
+       tick_nohz_account_idle_ticks(ts);
+}
+
+void tick_nohz_idle_restart_tick(void)
+{
+       struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+       if (ts->tick_stopped)
+               __tick_nohz_idle_restart_tick(ts, ktime_get());
+}
+
 /**
  * tick_nohz_idle_exit - restart the idle tick from the idle task
  *
@@ -1041,24 +1143,26 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 void tick_nohz_idle_exit(void)
 {
        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+       bool idle_active, tick_stopped;
        ktime_t now;
 
        local_irq_disable();
 
        WARN_ON_ONCE(!ts->inidle);
+       WARN_ON_ONCE(ts->timer_expires_base);
 
        ts->inidle = 0;
+       idle_active = ts->idle_active;
+       tick_stopped = ts->tick_stopped;
 
-       if (ts->idle_active || ts->tick_stopped)
+       if (idle_active || tick_stopped)
                now = ktime_get();
 
-       if (ts->idle_active)
+       if (idle_active)
                tick_nohz_stop_idle(ts, now);
 
-       if (ts->tick_stopped) {
-               tick_nohz_restart_sched_tick(ts, now);
-               tick_nohz_account_idle_ticks(ts);
-       }
+       if (tick_stopped)
+               __tick_nohz_idle_restart_tick(ts, now);
 
        local_irq_enable();
 }
@@ -1074,7 +1178,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 
        dev->next_event = KTIME_MAX;
 
-       tick_sched_do_timer(now);
+       tick_sched_do_timer(ts, now);
        tick_sched_handle(ts, regs);
 
        /* No need to reprogram if we are running tickless  */
@@ -1169,7 +1273,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
        struct pt_regs *regs = get_irq_regs();
        ktime_t now = ktime_get();
 
-       tick_sched_do_timer(now);
+       tick_sched_do_timer(ts, now);
 
        /*
         * Do not call, when we are not in irq context and have
index 954b43dbf21cb7f64f31bb6a849a0654a63620a5..6de959a854b2c78deac9b2682791ccb5fe436379 100644 (file)
@@ -38,31 +38,37 @@ enum tick_nohz_mode {
  * @idle_exittime:     Time when the idle state was left
  * @idle_sleeptime:    Sum of the time slept in idle with sched tick stopped
  * @iowait_sleeptime:  Sum of the time slept in idle with sched tick stopped, with IO outstanding
- * @sleep_length:      Duration of the current idle sleep
+ * @timer_expires:     Anticipated timer expiration time (in case sched tick is stopped)
+ * @timer_expires_base:        Base time clock monotonic for @timer_expires
  * @do_timer_lst:      CPU was the last one doing do_timer before going idle
+ * @got_idle_tick:     Tick timer function has run with @inidle set
  */
 struct tick_sched {
        struct hrtimer                  sched_timer;
        unsigned long                   check_clocks;
        enum tick_nohz_mode             nohz_mode;
+
+       unsigned int                    inidle          : 1;
+       unsigned int                    tick_stopped    : 1;
+       unsigned int                    idle_active     : 1;
+       unsigned int                    do_timer_last   : 1;
+       unsigned int                    got_idle_tick   : 1;
+
        ktime_t                         last_tick;
        ktime_t                         next_tick;
-       int                             inidle;
-       int                             tick_stopped;
        unsigned long                   idle_jiffies;
        unsigned long                   idle_calls;
        unsigned long                   idle_sleeps;
-       int                             idle_active;
        ktime_t                         idle_entrytime;
        ktime_t                         idle_waketime;
        ktime_t                         idle_exittime;
        ktime_t                         idle_sleeptime;
        ktime_t                         iowait_sleeptime;
-       ktime_t                         sleep_length;
        unsigned long                   last_jiffies;
+       u64                             timer_expires;
+       u64                             timer_expires_base;
        u64                             next_timer;
        ktime_t                         idle_expires;
-       int                             do_timer_last;
        atomic_t                        tick_dep_mask;
 };
 
index fdbeeb02dde9fcfc40640648df6eecb910cf95e2..cf5c0828ee3157bf620561f39838edd6c144e140 100644 (file)
@@ -31,6 +31,4 @@ static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
 }
 #endif
 
-extern time64_t __ktime_get_real_seconds(void);
-
 #endif /* _TIMEKEEPING_INTERNAL_H */
index 0b249e2f0c3c286a519db61cd9a05df91307a263..c4f0f2e4126e4dedf8d7f1b0a7974092f982c72f 100644 (file)
@@ -606,7 +606,10 @@ config HIST_TRIGGERS
          event activity as an initial guide for further investigation
          using more advanced tools.
 
-         See Documentation/trace/events.txt.
+         Inter-event tracing of quantities such as latencies is also
+         supported using hist triggers under this option.
+
+         See Documentation/trace/histogram.txt.
          If in doubt, say N.
 
 config MMIOTRACE_TEST
index eac9ce2c57a2ede6d56bda9985e0d06470babd76..16bbf062018fa79af48db2339db8ad1c094b3e85 100644 (file)
@@ -3902,14 +3902,13 @@ static bool module_exists(const char *module)
 {
        /* All modules have the symbol __this_module */
        const char this_mod[] = "__this_module";
-       const int modname_size = MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 1;
-       char modname[modname_size + 1];
+       char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
        unsigned long val;
        int n;
 
-       n = snprintf(modname, modname_size + 1, "%s:%s", module, this_mod);
+       n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
 
-       if (n > modname_size)
+       if (n > sizeof(modname) - 1)
                return false;
 
        val = module_kallsyms_lookup_name(modname);
index dcf1c4dd3efe6954b7d030b986f43bd0a897f052..c9cb9767d49b915e28d6eb50187d4d6805cea564 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/hash.h>
 #include <linux/list.h>
 #include <linux/cpu.h>
+#include <linux/oom.h>
 
 #include <asm/local.h>
 
@@ -41,6 +42,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
                         RINGBUF_TYPE_PADDING);
        trace_seq_printf(s, "\ttime_extend : type == %d\n",
                         RINGBUF_TYPE_TIME_EXTEND);
+       trace_seq_printf(s, "\ttime_stamp : type == %d\n",
+                        RINGBUF_TYPE_TIME_STAMP);
        trace_seq_printf(s, "\tdata max type_len  == %d\n",
                         RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 
@@ -140,12 +143,15 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 
 enum {
        RB_LEN_TIME_EXTEND = 8,
-       RB_LEN_TIME_STAMP = 16,
+       RB_LEN_TIME_STAMP =  8,
 };
 
 #define skip_time_extend(event) \
        ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
 
+#define extended_time(event) \
+       (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
+
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
        return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -209,7 +215,7 @@ rb_event_ts_length(struct ring_buffer_event *event)
 {
        unsigned len = 0;
 
-       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+       if (extended_time(event)) {
                /* time extends include the data event after it */
                len = RB_LEN_TIME_EXTEND;
                event = skip_time_extend(event);
@@ -231,7 +237,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
        unsigned length;
 
-       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+       if (extended_time(event))
                event = skip_time_extend(event);
 
        length = rb_event_length(event);
@@ -248,7 +254,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 static __always_inline void *
 rb_event_data(struct ring_buffer_event *event)
 {
-       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+       if (extended_time(event))
                event = skip_time_extend(event);
        BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
        /* If length is in len field, then array[0] has the data */
@@ -275,6 +281,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 #define TS_MASK                ((1ULL << TS_SHIFT) - 1)
 #define TS_DELTA_TEST  (~TS_MASK)
 
+/**
+ * ring_buffer_event_time_stamp - return the event's extended timestamp
+ * @event: the event to get the timestamp of
+ *
+ * Returns the extended timestamp associated with a data event.
+ * An extended time_stamp is a 64-bit timestamp represented
+ * internally in a special way that makes the best use of space
+ * contained within a ring buffer event.  This function decodes
+ * it and maps it to a straight u64 value.
+ */
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
+{
+       u64 ts;
+
+       ts = event->array[0];
+       ts <<= TS_SHIFT;
+       ts += event->time_delta;
+
+       return ts;
+}
+
 /* Flag when events were overwritten */
 #define RB_MISSED_EVENTS       (1 << 31)
 /* Missed count stored at end */
@@ -451,6 +478,7 @@ struct ring_buffer_per_cpu {
        struct buffer_page              *reader_page;
        unsigned long                   lost_events;
        unsigned long                   last_overrun;
+       unsigned long                   nest;
        local_t                         entries_bytes;
        local_t                         entries;
        local_t                         overrun;
@@ -488,6 +516,7 @@ struct ring_buffer {
        u64                             (*clock)(void);
 
        struct rb_irq_work              irq_work;
+       bool                            time_stamp_abs;
 };
 
 struct ring_buffer_iter {
@@ -1134,30 +1163,60 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
 static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
 {
        struct buffer_page *bpage, *tmp;
+       bool user_thread = current->mm != NULL;
+       gfp_t mflags;
        long i;
 
+       /*
+        * Check if the available memory is there first.
+        * Note, si_mem_available() only gives us a rough estimate of available
+        * memory. It may not be accurate. But we don't care, we just want
+        * to prevent doing any allocation when it is obvious that it is
+        * not going to succeed.
+        */
+       i = si_mem_available();
+       if (i < nr_pages)
+               return -ENOMEM;
+
+       /*
+        * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
+        * gracefully without invoking oom-killer and the system is not
+        * destabilized.
+        */
+       mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
+
+       /*
+        * If a user thread allocates too much, and si_mem_available()
+        * reports there's enough memory, even though there is not.
+        * Make sure the OOM killer kills this thread. This can happen
+        * even with RETRY_MAYFAIL because another task may be doing
+        * an allocation after this task has taken all memory.
+        * This is the task the OOM killer needs to take out during this
+        * loop, even if it was triggered by an allocation somewhere else.
+        */
+       if (user_thread)
+               set_current_oom_origin();
        for (i = 0; i < nr_pages; i++) {
                struct page *page;
-               /*
-                * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
-                * gracefully without invoking oom-killer and the system is not
-                * destabilized.
-                */
+
                bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
-                                   GFP_KERNEL | __GFP_RETRY_MAYFAIL,
-                                   cpu_to_node(cpu));
+                                   mflags, cpu_to_node(cpu));
                if (!bpage)
                        goto free_pages;
 
                list_add(&bpage->list, pages);
 
-               page = alloc_pages_node(cpu_to_node(cpu),
-                                       GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0);
+               page = alloc_pages_node(cpu_to_node(cpu), mflags, 0);
                if (!page)
                        goto free_pages;
                bpage->page = page_address(page);
                rb_init_page(bpage->page);
+
+               if (user_thread && fatal_signal_pending(current))
+                       goto free_pages;
        }
+       if (user_thread)
+               clear_current_oom_origin();
 
        return 0;
 
@@ -1166,6 +1225,8 @@ free_pages:
                list_del_init(&bpage->list);
                free_buffer_page(bpage);
        }
+       if (user_thread)
+               clear_current_oom_origin();
 
        return -ENOMEM;
 }
@@ -1382,6 +1443,16 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
        buffer->clock = clock;
 }
 
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs)
+{
+       buffer->time_stamp_abs = abs;
+}
+
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer)
+{
+       return buffer->time_stamp_abs;
+}
+
 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
 
 static inline unsigned long rb_page_entries(struct buffer_page *bpage)
@@ -2206,12 +2277,15 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 
 /* Slow path, do not inline */
 static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
 {
-       event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+       if (abs)
+               event->type_len = RINGBUF_TYPE_TIME_STAMP;
+       else
+               event->type_len = RINGBUF_TYPE_TIME_EXTEND;
 
-       /* Not the first event on the page? */
-       if (rb_event_index(event)) {
+       /* Not the first event on the page, or not delta? */
+       if (abs || rb_event_index(event)) {
                event->time_delta = delta & TS_MASK;
                event->array[0] = delta >> TS_SHIFT;
        } else {
@@ -2254,7 +2328,9 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
         * add it to the start of the resevered space.
         */
        if (unlikely(info->add_timestamp)) {
-               event = rb_add_time_stamp(event, delta);
+               bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
+
+               event = rb_add_time_stamp(event, info->delta, abs);
                length -= RB_LEN_TIME_EXTEND;
                delta = 0;
        }
@@ -2442,7 +2518,7 @@ static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer
 
 static inline void rb_event_discard(struct ring_buffer_event *event)
 {
-       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+       if (extended_time(event))
                event = skip_time_extend(event);
 
        /* array[0] holds the actual length for the discarded event */
@@ -2486,10 +2562,11 @@ rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                        cpu_buffer->write_stamp =
                                cpu_buffer->commit_page->page->time_stamp;
                else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
-                       delta = event->array[0];
-                       delta <<= TS_SHIFT;
-                       delta += event->time_delta;
+                       delta = ring_buffer_event_time_stamp(event);
                        cpu_buffer->write_stamp += delta;
+               } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
+                       delta = ring_buffer_event_time_stamp(event);
+                       cpu_buffer->write_stamp = delta;
                } else
                        cpu_buffer->write_stamp += event->time_delta;
        }
@@ -2581,10 +2658,10 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
                bit = pc & NMI_MASK ? RB_CTX_NMI :
                        pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
 
-       if (unlikely(val & (1 << bit)))
+       if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
                return 1;
 
-       val |= (1 << bit);
+       val |= (1 << (bit + cpu_buffer->nest));
        cpu_buffer->current_context = val;
 
        return 0;
@@ -2593,7 +2670,57 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 static __always_inline void
 trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
 {
-       cpu_buffer->current_context &= cpu_buffer->current_context - 1;
+       cpu_buffer->current_context &=
+               cpu_buffer->current_context - (1 << cpu_buffer->nest);
+}
+
+/* The recursive locking above uses 4 bits */
+#define NESTED_BITS 4
+
+/**
+ * ring_buffer_nest_start - Allow to trace while nested
+ * @buffer: The ring buffer to modify
+ *
+ * The ring buffer has a safty mechanism to prevent recursion.
+ * But there may be a case where a trace needs to be done while
+ * tracing something else. In this case, calling this function
+ * will allow this function to nest within a currently active
+ * ring_buffer_lock_reserve().
+ *
+ * Call this function before calling another ring_buffer_lock_reserve() and
+ * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit().
+ */
+void ring_buffer_nest_start(struct ring_buffer *buffer)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       int cpu;
+
+       /* Enabled by ring_buffer_nest_end() */
+       preempt_disable_notrace();
+       cpu = raw_smp_processor_id();
+       cpu_buffer = buffer->buffers[cpu];
+       /* This is the shift value for the above recusive locking */
+       cpu_buffer->nest += NESTED_BITS;
+}
+
+/**
+ * ring_buffer_nest_end - Allow to trace while nested
+ * @buffer: The ring buffer to modify
+ *
+ * Must be called after ring_buffer_nest_start() and after the
+ * ring_buffer_unlock_commit().
+ */
+void ring_buffer_nest_end(struct ring_buffer *buffer)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       int cpu;
+
+       /* disabled by ring_buffer_nest_start() */
+       cpu = raw_smp_processor_id();
+       cpu_buffer = buffer->buffers[cpu];
+       /* This is the shift value for the above recusive locking */
+       cpu_buffer->nest -= NESTED_BITS;
+       preempt_enable_notrace();
 }
 
 /**
@@ -2637,7 +2764,8 @@ rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
                  sched_clock_stable() ? "" :
                  "If you just came from a suspend/resume,\n"
                  "please switch to the trace global clock:\n"
-                 "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
+                 "  echo global > /sys/kernel/debug/tracing/trace_clock\n"
+                 "or add trace_clock=global to the kernel command line\n");
        info->add_timestamp = 1;
 }
 
@@ -2669,7 +2797,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
         * If this is the first commit on the page, then it has the same
         * timestamp as the page itself.
         */
-       if (!tail)
+       if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
                info->delta = 0;
 
        /* See if we shot pass the end of this buffer page */
@@ -2746,8 +2874,11 @@ rb_reserve_next_event(struct ring_buffer *buffer,
        /* make sure this diff is calculated here */
        barrier();
 
-       /* Did the write stamp get updated already? */
-       if (likely(info.ts >= cpu_buffer->write_stamp)) {
+       if (ring_buffer_time_stamp_abs(buffer)) {
+               info.delta = info.ts;
+               rb_handle_timestamp(cpu_buffer, &info);
+       } else /* Did the write stamp get updated already? */
+               if (likely(info.ts >= cpu_buffer->write_stamp)) {
                info.delta = diff;
                if (unlikely(test_time_stamp(info.delta)))
                        rb_handle_timestamp(cpu_buffer, &info);
@@ -3429,14 +3560,13 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                return;
 
        case RINGBUF_TYPE_TIME_EXTEND:
-               delta = event->array[0];
-               delta <<= TS_SHIFT;
-               delta += event->time_delta;
+               delta = ring_buffer_event_time_stamp(event);
                cpu_buffer->read_stamp += delta;
                return;
 
        case RINGBUF_TYPE_TIME_STAMP:
-               /* FIXME: not implemented */
+               delta = ring_buffer_event_time_stamp(event);
+               cpu_buffer->read_stamp = delta;
                return;
 
        case RINGBUF_TYPE_DATA:
@@ -3460,14 +3590,13 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
                return;
 
        case RINGBUF_TYPE_TIME_EXTEND:
-               delta = event->array[0];
-               delta <<= TS_SHIFT;
-               delta += event->time_delta;
+               delta = ring_buffer_event_time_stamp(event);
                iter->read_stamp += delta;
                return;
 
        case RINGBUF_TYPE_TIME_STAMP:
-               /* FIXME: not implemented */
+               delta = ring_buffer_event_time_stamp(event);
+               iter->read_stamp = delta;
                return;
 
        case RINGBUF_TYPE_DATA:
@@ -3691,6 +3820,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
        struct buffer_page *reader;
        int nr_loops = 0;
 
+       if (ts)
+               *ts = 0;
  again:
        /*
         * We repeat when a time extend is encountered.
@@ -3727,12 +3858,17 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
                goto again;
 
        case RINGBUF_TYPE_TIME_STAMP:
-               /* FIXME: not implemented */
+               if (ts) {
+                       *ts = ring_buffer_event_time_stamp(event);
+                       ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+                                                        cpu_buffer->cpu, ts);
+               }
+               /* Internal data, OK to advance */
                rb_advance_reader(cpu_buffer);
                goto again;
 
        case RINGBUF_TYPE_DATA:
-               if (ts) {
+               if (ts && !(*ts)) {
                        *ts = cpu_buffer->read_stamp + event->time_delta;
                        ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
                                                         cpu_buffer->cpu, ts);
@@ -3757,6 +3893,9 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        struct ring_buffer_event *event;
        int nr_loops = 0;
 
+       if (ts)
+               *ts = 0;
+
        cpu_buffer = iter->cpu_buffer;
        buffer = cpu_buffer->buffer;
 
@@ -3809,12 +3948,17 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
                goto again;
 
        case RINGBUF_TYPE_TIME_STAMP:
-               /* FIXME: not implemented */
+               if (ts) {
+                       *ts = ring_buffer_event_time_stamp(event);
+                       ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+                                                        cpu_buffer->cpu, ts);
+               }
+               /* Internal data, OK to advance */
                rb_advance_iter(iter);
                goto again;
 
        case RINGBUF_TYPE_DATA:
-               if (ts) {
+               if (ts && !(*ts)) {
                        *ts = iter->read_stamp + event->time_delta;
                        ring_buffer_normalize_time_stamp(buffer,
                                                         cpu_buffer->cpu, ts);
index 5071931eb94352672412967128a48e3fab4049e5..dfbcf9ee1447645594b06736e2ca9616e6cd93eb 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/nmi.h>
 #include <linux/fs.h>
 #include <linux/trace.h>
+#include <linux/sched/clock.h>
 #include <linux/sched/rt.h>
 
 #include "trace.h"
@@ -1168,6 +1169,14 @@ static struct {
        ARCH_TRACE_CLOCKS
 };
 
+bool trace_clock_in_ns(struct trace_array *tr)
+{
+       if (trace_clocks[tr->clock_id].in_ns)
+               return true;
+
+       return false;
+}
+
 /*
  * trace_parser_get_init - gets the buffer for trace parser
  */
@@ -2269,7 +2278,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
 
        *current_rb = trace_file->tr->trace_buffer.buffer;
 
-       if ((trace_file->flags &
+       if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
             (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
            (entry = this_cpu_read(trace_buffered_event))) {
                /* Try to use the per cpu buffer first */
@@ -4515,6 +4524,9 @@ static const char readme_msg[] =
 #ifdef CONFIG_X86_64
        "     x86-tsc:   TSC cycle counter\n"
 #endif
+       "\n  timestamp_mode\t-view the mode used to timestamp events\n"
+       "       delta:   Delta difference against a buffer-wide timestamp\n"
+       "    absolute:   Absolute (standalone) timestamp\n"
        "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
        "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
        "  tracing_cpumask\t- Limit which CPUs to trace\n"
@@ -4691,8 +4703,9 @@ static const char readme_msg[] =
        "\t            .sym        display an address as a symbol\n"
        "\t            .sym-offset display an address as a symbol and offset\n"
        "\t            .execname   display a common_pid as a program name\n"
-       "\t            .syscall    display a syscall id as a syscall name\n\n"
-       "\t            .log2       display log2 value rather than raw number\n\n"
+       "\t            .syscall    display a syscall id as a syscall name\n"
+       "\t            .log2       display log2 value rather than raw number\n"
+       "\t            .usecs      display a common_timestamp in microseconds\n\n"
        "\t    The 'pause' parameter can be used to pause an existing hist\n"
        "\t    trigger or to start a hist trigger but not log any events\n"
        "\t    until told to do so.  'continue' can be used to start or\n"
@@ -6202,7 +6215,7 @@ static int tracing_clock_show(struct seq_file *m, void *v)
        return 0;
 }
 
-static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
+int tracing_set_clock(struct trace_array *tr, const char *clockstr)
 {
        int i;
 
@@ -6282,6 +6295,71 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
        return ret;
 }
 
+static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
+{
+       struct trace_array *tr = m->private;
+
+       mutex_lock(&trace_types_lock);
+
+       if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
+               seq_puts(m, "delta [absolute]\n");
+       else
+               seq_puts(m, "[delta] absolute\n");
+
+       mutex_unlock(&trace_types_lock);
+
+       return 0;
+}
+
+static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
+{
+       struct trace_array *tr = inode->i_private;
+       int ret;
+
+       if (tracing_disabled)
+               return -ENODEV;
+
+       if (trace_array_get(tr))
+               return -ENODEV;
+
+       ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
+       if (ret < 0)
+               trace_array_put(tr);
+
+       return ret;
+}
+
+int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
+{
+       int ret = 0;
+
+       mutex_lock(&trace_types_lock);
+
+       if (abs && tr->time_stamp_abs_ref++)
+               goto out;
+
+       if (!abs) {
+               if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               if (--tr->time_stamp_abs_ref)
+                       goto out;
+       }
+
+       ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+       if (tr->max_buffer.buffer)
+               ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
+#endif
+ out:
+       mutex_unlock(&trace_types_lock);
+
+       return ret;
+}
+
 struct ftrace_buffer_info {
        struct trace_iterator   iter;
        void                    *spare;
@@ -6529,6 +6607,13 @@ static const struct file_operations trace_clock_fops = {
        .write          = tracing_clock_write,
 };
 
+static const struct file_operations trace_time_stamp_mode_fops = {
+       .open           = tracing_time_stamp_mode_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = tracing_single_release_tr,
+};
+
 #ifdef CONFIG_TRACER_SNAPSHOT
 static const struct file_operations snapshot_fops = {
        .open           = tracing_snapshot_open,
@@ -7699,6 +7784,7 @@ static int instance_mkdir(const char *name)
 
        INIT_LIST_HEAD(&tr->systems);
        INIT_LIST_HEAD(&tr->events);
+       INIT_LIST_HEAD(&tr->hist_vars);
 
        if (allocate_trace_buffers(tr, trace_buf_size) < 0)
                goto out_free_tr;
@@ -7851,6 +7937,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
        trace_create_file("tracing_on", 0644, d_tracer,
                          tr, &rb_simple_fops);
 
+       trace_create_file("timestamp_mode", 0444, d_tracer, tr,
+                         &trace_time_stamp_mode_fops);
+
        create_trace_options_dir(tr);
 
 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
@@ -8446,6 +8535,7 @@ __init static int tracer_alloc_buffers(void)
 
        INIT_LIST_HEAD(&global_trace.systems);
        INIT_LIST_HEAD(&global_trace.events);
+       INIT_LIST_HEAD(&global_trace.hist_vars);
        list_add(&global_trace.list, &ftrace_trace_arrays);
 
        apply_trace_boot_options();
@@ -8507,3 +8597,21 @@ __init static int clear_boot_tracer(void)
 
 fs_initcall(tracer_init_tracefs);
 late_initcall_sync(clear_boot_tracer);
+
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+__init static int tracing_set_default_clock(void)
+{
+       /* sched_clock_stable() is determined in late_initcall */
+       if (!trace_boot_clock && !sched_clock_stable()) {
+               printk(KERN_WARNING
+                      "Unstable clock detected, switching default tracing clock to \"global\"\n"
+                      "If you want to keep using the local clock, then add:\n"
+                      "  \"trace_clock=local\"\n"
+                      "on the kernel command line\n");
+               tracing_set_clock(&global_trace, "global");
+       }
+
+       return 0;
+}
+late_initcall_sync(tracing_set_default_clock);
+#endif
index 2a6d0325a76181a0a8b309eaa2b69f91b350d98a..6fb46a06c9dc0deef8560b57eaedf1aaf88cb0a2 100644 (file)
@@ -273,6 +273,8 @@ struct trace_array {
        /* function tracing enabled */
        int                     function_enabled;
 #endif
+       int                     time_stamp_abs_ref;
+       struct list_head        hist_vars;
 };
 
 enum {
@@ -286,6 +288,11 @@ extern struct mutex trace_types_lock;
 extern int trace_array_get(struct trace_array *tr);
 extern void trace_array_put(struct trace_array *tr);
 
+extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
+extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
+
+extern bool trace_clock_in_ns(struct trace_array *tr);
+
 /*
  * The global tracer (top) should be the first trace array added,
  * but we check the flag anyway.
@@ -1209,12 +1216,11 @@ struct ftrace_event_field {
        int                     is_signed;
 };
 
+struct prog_entry;
+
 struct event_filter {
-       int                     n_preds;        /* Number assigned */
-       int                     a_preds;        /* allocated */
-       struct filter_pred __rcu        *preds;
-       struct filter_pred __rcu        *root;
-       char                            *filter_string;
+       struct prog_entry __rcu *prog;
+       char                    *filter_string;
 };
 
 struct event_subsystem {
@@ -1291,7 +1297,7 @@ __event_trigger_test_discard(struct trace_event_file *file,
        unsigned long eflags = file->flags;
 
        if (eflags & EVENT_FILE_FL_TRIGGER_COND)
-               *tt = event_triggers_call(file, entry);
+               *tt = event_triggers_call(file, entry, event);
 
        if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
            (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
@@ -1328,7 +1334,7 @@ event_trigger_unlock_commit(struct trace_event_file *file,
                trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
 
        if (tt)
-               event_triggers_post_call(file, tt, entry);
+               event_triggers_post_call(file, tt, entry, event);
 }
 
 /**
@@ -1361,7 +1367,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
                                                irq_flags, pc, regs);
 
        if (tt)
-               event_triggers_post_call(file, tt, entry);
+               event_triggers_post_call(file, tt, entry, event);
 }
 
 #define FILTER_PRED_INVALID    ((unsigned short)-1)
@@ -1406,12 +1412,8 @@ struct filter_pred {
        unsigned short          *ops;
        struct ftrace_event_field *field;
        int                     offset;
-       int                     not;
+       int                     not;
        int                     op;
-       unsigned short          index;
-       unsigned short          parent;
-       unsigned short          left;
-       unsigned short          right;
 };
 
 static inline bool is_string_field(struct ftrace_event_field *field)
@@ -1543,6 +1545,8 @@ extern void pause_named_trigger(struct event_trigger_data *data);
 extern void unpause_named_trigger(struct event_trigger_data *data);
 extern void set_named_trigger_data(struct event_trigger_data *data,
                                   struct event_trigger_data *named_data);
+extern struct event_trigger_data *
+get_named_trigger_data(struct event_trigger_data *data);
 extern int register_event_command(struct event_command *cmd);
 extern int unregister_event_command(struct event_command *cmd);
 extern int register_trigger_hist_enable_disable_cmds(void);
@@ -1586,7 +1590,8 @@ extern int register_trigger_hist_enable_disable_cmds(void);
  */
 struct event_trigger_ops {
        void                    (*func)(struct event_trigger_data *data,
-                                       void *rec);
+                                       void *rec,
+                                       struct ring_buffer_event *rbe);
        int                     (*init)(struct event_trigger_ops *ops,
                                        struct event_trigger_data *data);
        void                    (*free)(struct event_trigger_ops *ops,
index 5fdc779f411d83f5f47d5f501b0c0b8cc40facca..d8a188e0418aa9ad31798d9fb5cb3320a409d1b2 100644 (file)
@@ -96,7 +96,7 @@ u64 notrace trace_clock_global(void)
        int this_cpu;
        u64 now;
 
-       local_irq_save(flags);
+       raw_local_irq_save(flags);
 
        this_cpu = raw_smp_processor_id();
        now = sched_clock_cpu(this_cpu);
@@ -122,7 +122,7 @@ u64 notrace trace_clock_global(void)
        arch_spin_unlock(&trace_clock_struct.lock);
 
  out:
-       local_irq_restore(flags);
+       raw_local_irq_restore(flags);
 
        return now;
 }
index a764aec3c9a17a386112b56326f188881babebe6..1bda4ec95e1819d9fade7de8684f16dc0425dde5 100644 (file)
        "# Only events with the given fields will be affected.\n"       \
        "# If no events are modified, an error message will be displayed here"
 
-enum filter_op_ids
-{
-       OP_OR,
-       OP_AND,
-       OP_GLOB,
-       OP_NE,
-       OP_EQ,
-       OP_LT,
-       OP_LE,
-       OP_GT,
-       OP_GE,
-       OP_BAND,
-       OP_NOT,
-       OP_NONE,
-       OP_OPEN_PAREN,
-};
+/* Due to token parsing '<=' must be before '<' and '>=' must be before '>' */
+#define OPS                                    \
+       C( OP_GLOB,     "~"  ),                 \
+       C( OP_NE,       "!=" ),                 \
+       C( OP_EQ,       "==" ),                 \
+       C( OP_LE,       "<=" ),                 \
+       C( OP_LT,       "<"  ),                 \
+       C( OP_GE,       ">=" ),                 \
+       C( OP_GT,       ">"  ),                 \
+       C( OP_BAND,     "&"  ),                 \
+       C( OP_MAX,      NULL )
 
-struct filter_op {
-       int id;
-       char *string;
-       int precedence;
-};
+#undef C
+#define C(a, b)        a
 
-/* Order must be the same as enum filter_op_ids above */
-static struct filter_op filter_ops[] = {
-       { OP_OR,        "||",           1 },
-       { OP_AND,       "&&",           2 },
-       { OP_GLOB,      "~",            4 },
-       { OP_NE,        "!=",           4 },
-       { OP_EQ,        "==",           4 },
-       { OP_LT,        "<",            5 },
-       { OP_LE,        "<=",           5 },
-       { OP_GT,        ">",            5 },
-       { OP_GE,        ">=",           5 },
-       { OP_BAND,      "&",            6 },
-       { OP_NOT,       "!",            6 },
-       { OP_NONE,      "OP_NONE",      0 },
-       { OP_OPEN_PAREN, "(",           0 },
-};
+enum filter_op_ids { OPS };
 
-enum {
-       FILT_ERR_NONE,
-       FILT_ERR_INVALID_OP,
-       FILT_ERR_UNBALANCED_PAREN,
-       FILT_ERR_TOO_MANY_OPERANDS,
-       FILT_ERR_OPERAND_TOO_LONG,
-       FILT_ERR_FIELD_NOT_FOUND,
-       FILT_ERR_ILLEGAL_FIELD_OP,
-       FILT_ERR_ILLEGAL_INTVAL,
-       FILT_ERR_BAD_SUBSYS_FILTER,
-       FILT_ERR_TOO_MANY_PREDS,
-       FILT_ERR_MISSING_FIELD,
-       FILT_ERR_INVALID_FILTER,
-       FILT_ERR_IP_FIELD_ONLY,
-       FILT_ERR_ILLEGAL_NOT_OP,
-};
+#undef C
+#define C(a, b)        b
 
-static char *err_text[] = {
-       "No error",
-       "Invalid operator",
-       "Unbalanced parens",
-       "Too many operands",
-       "Operand too long",
-       "Field not found",
-       "Illegal operation for field type",
-       "Illegal integer value",
-       "Couldn't find or set field in one of a subsystem's events",
-       "Too many terms in predicate expression",
-       "Missing field name and/or value",
-       "Meaningless filter expression",
-       "Only 'ip' field is supported for function trace",
-       "Illegal use of '!'",
-};
+static const char * ops[] = { OPS };
 
-struct opstack_op {
-       enum filter_op_ids op;
-       struct list_head list;
-};
+/*
+ * pred functions are OP_LE, OP_LT, OP_GE, OP_GT, and OP_BAND
+ * pred_funcs_##type below must match the order of them above.
+ */
+#define PRED_FUNC_START                        OP_LE
+#define PRED_FUNC_MAX                  (OP_BAND - PRED_FUNC_START)
+
+#define ERRORS                                                         \
+       C(NONE,                 "No error"),                            \
+       C(INVALID_OP,           "Invalid operator"),                    \
+       C(TOO_MANY_OPEN,        "Too many '('"),                        \
+       C(TOO_MANY_CLOSE,       "Too few '('"),                         \
+       C(MISSING_QUOTE,        "Missing matching quote"),              \
+       C(OPERAND_TOO_LONG,     "Operand too long"),                    \
+       C(EXPECT_STRING,        "Expecting string field"),              \
+       C(EXPECT_DIGIT,         "Expecting numeric field"),             \
+       C(ILLEGAL_FIELD_OP,     "Illegal operation for field type"),    \
+       C(FIELD_NOT_FOUND,      "Field not found"),                     \
+       C(ILLEGAL_INTVAL,       "Illegal integer value"),               \
+       C(BAD_SUBSYS_FILTER,    "Couldn't find or set field in one of a subsystem's events"), \
+       C(TOO_MANY_PREDS,       "Too many terms in predicate expression"), \
+       C(INVALID_FILTER,       "Meaningless filter expression"),       \
+       C(IP_FIELD_ONLY,        "Only 'ip' field is supported for function trace"), \
+       C(INVALID_VALUE,        "Invalid value (did you forget quotes)?"),
+
+#undef C
+#define C(a, b)                FILT_ERR_##a
+
+enum { ERRORS };
+
+#undef C
+#define C(a, b)                b
+
+static char *err_text[] = { ERRORS };
+
+/* Called after a '!' character but "!=" and "!~" are not "not"s */
+static bool is_not(const char *str)
+{
+       switch (str[1]) {
+       case '=':
+       case '~':
+               return false;
+       }
+       return true;
+}
 
-struct postfix_elt {
-       enum filter_op_ids op;
-       char *operand;
-       struct list_head list;
+/**
+ * prog_entry - a singe entry in the filter program
+ * @target:         Index to jump to on a branch (actually one minus the index)
+ * @when_to_branch:  The value of the result of the predicate to do a branch
+ * @pred:           The predicate to execute.
+ */
+struct prog_entry {
+       int                     target;
+       int                     when_to_branch;
+       struct filter_pred      *pred;
 };
 
-struct filter_parse_state {
-       struct filter_op *ops;
-       struct list_head opstack;
-       struct list_head postfix;
+/**
+ * update_preds- assign a program entry a label target
+ * @prog: The program array
+ * @N: The index of the current entry in @prog
+ * @when_to_branch: What to assign a program entry for its branch condition
+ *
+ * The program entry at @N has a target that points to the index of a program
+ * entry that can have its target and when_to_branch fields updated.
+ * Update the current program entry denoted by index @N target field to be
+ * that of the updated entry. This will denote the entry to update if
+ * we are processing an "||" after an "&&"
+ */
+static void update_preds(struct prog_entry *prog, int N, int invert)
+{
+       int t, s;
+
+       t = prog[N].target;
+       s = prog[t].target;
+       prog[t].when_to_branch = invert;
+       prog[t].target = N;
+       prog[N].target = s;
+}
+
+struct filter_parse_error {
        int lasterr;
        int lasterr_pos;
-
-       struct {
-               char *string;
-               unsigned int cnt;
-               unsigned int tail;
-       } infix;
-
-       struct {
-               char string[MAX_FILTER_STR_VAL];
-               int pos;
-               unsigned int tail;
-       } operand;
 };
 
-struct pred_stack {
-       struct filter_pred      **preds;
-       int                     index;
+static void parse_error(struct filter_parse_error *pe, int err, int pos)
+{
+       pe->lasterr = err;
+       pe->lasterr_pos = pos;
+}
+
+typedef int (*parse_pred_fn)(const char *str, void *data, int pos,
+                            struct filter_parse_error *pe,
+                            struct filter_pred **pred);
+
+enum {
+       INVERT          = 1,
+       PROCESS_AND     = 2,
+       PROCESS_OR      = 4,
 };
 
-/* If not of not match is equal to not of not, then it is a match */
+/*
+ * Without going into a formal proof, this explains the method that is used in
+ * parsing the logical expressions.
+ *
+ * For example, if we have: "a && !(!b || (c && g)) || d || e && !f"
+ * The first pass will convert it into the following program:
+ *
+ * n1: r=a;       l1: if (!r) goto l4;
+ * n2: r=b;       l2: if (!r) goto l4;
+ * n3: r=c; r=!r; l3: if (r) goto l4;
+ * n4: r=g; r=!r; l4: if (r) goto l5;
+ * n5: r=d;       l5: if (r) goto T
+ * n6: r=e;       l6: if (!r) goto l7;
+ * n7: r=f; r=!r; l7: if (!r) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * To do this, we use a data structure to represent each of the above
+ * predicate and conditions that has:
+ *
+ *  predicate, when_to_branch, invert, target
+ *
+ * The "predicate" will hold the function to determine the result "r".
+ * The "when_to_branch" denotes what "r" should be if a branch is to be taken
+ * "&&" would contain "!r" or (0) and "||" would contain "r" or (1).
+ * The "invert" holds whether the value should be reversed before testing.
+ * The "target" contains the label "l#" to jump to.
+ *
+ * A stack is created to hold values when parentheses are used.
+ *
+ * To simplify the logic, the labels will start at 0 and not 1.
+ *
+ * The possible invert values are 1 and 0. The number of "!"s that are in scope
+ * before the predicate determines the invert value, if the number is odd then
+ * the invert value is 1 and 0 otherwise. This means the invert value only
+ * needs to be toggled when a new "!" is introduced compared to what is stored
+ * on the stack, where parentheses were used.
+ *
+ * The top of the stack and "invert" are initialized to zero.
+ *
+ * ** FIRST PASS **
+ *
+ * #1 A loop through all the tokens is done:
+ *
+ * #2 If the token is an "(", the stack is push, and the current stack value
+ *    gets the current invert value, and the loop continues to the next token.
+ *    The top of the stack saves the "invert" value to keep track of what
+ *    the current inversion is. As "!(a && !b || c)" would require all
+ *    predicates being affected separately by the "!" before the parentheses.
+ *    And that would end up being equivalent to "(!a || b) && !c"
+ *
+ * #3 If the token is an "!", the current "invert" value gets inverted, and
+ *    the loop continues. Note, if the next token is a predicate, then
+ *    this "invert" value is only valid for the current program entry,
+ *    and does not affect other predicates later on.
+ *
+ * The only other acceptable token is the predicate string.
+ *
+ * #4 A new entry into the program is added saving: the predicate and the
+ *    current value of "invert". The target is currently assigned to the
+ *    previous program index (this will not be its final value).
+ *
+ * #5 We now enter another loop and look at the next token. The only valid
+ *    tokens are ")", "&&", "||" or end of the input string "\0".
+ *
+ * #6 The invert variable is reset to the current value saved on the top of
+ *    the stack.
+ *
+ * #7 The top of the stack holds not only the current invert value, but also
+ *    if a "&&" or "||" needs to be processed. Note, the "&&" takes higher
+ *    precedence than "||". That is "a && b || c && d" is equivalent to
+ *    "(a && b) || (c && d)". Thus the first thing to do is to see if "&&" needs
+ *    to be processed. This is the case if an "&&" was the last token. If it was
+ *    then we call update_preds(). This takes the program, the current index in
+ *    the program, and the current value of "invert".  More will be described
+ *    below about this function.
+ *
+ * #8 If the next token is "&&" then we set a flag in the top of the stack
+ *    that denotes that "&&" needs to be processed, break out of this loop
+ *    and continue with the outer loop.
+ *
+ * #9 Otherwise, if a "||" needs to be processed then update_preds() is called.
+ *    This is called with the program, the current index in the program, but
+ *    this time with an inverted value of "invert" (that is !invert). This is
+ *    because the value taken will become the "when_to_branch" value of the
+ *    program.
+ *    Note, this is called when the next token is not an "&&". As stated before,
+ *    "&&" takes higher precedence, and "||" should not be processed yet if the
+ *    next logical operation is "&&".
+ *
+ * #10 If the next token is "||" then we set a flag in the top of the stack
+ *     that denotes that "||" needs to be processed, break out of this loop
+ *     and continue with the outer loop.
+ *
+ * #11 If this is the end of the input string "\0" then we break out of both
+ *     loops.
+ *
+ * #12 Otherwise, the next token is ")", where we pop the stack and continue
+ *     this inner loop.
+ *
+ * Now to discuss the update_pred() function, as that is key to the setting up
+ * of the program. Remember the "target" of the program is initialized to the
+ * previous index and not the "l" label. The target holds the index into the
+ * program that gets affected by the operand. Thus if we have something like
+ *  "a || b && c", when we process "a" the target will be "-1" (undefined).
+ * When we process "b", its target is "0", which is the index of "a", as that's
+ * the predicate that is affected by "||". But because the next token after "b"
+ * is "&&" we don't call update_preds(). Instead continue to "c". As the
+ * next token after "c" is not "&&" but the end of input, we first process the
+ * "&&" by calling update_preds() for the "&&" then we process the "||" by
+ * callin updates_preds() with the values for processing "||".
+ *
+ * What does that mean? What update_preds() does is to first save the "target"
+ * of the program entry indexed by the current program entry's "target"
+ * (remember the "target" is initialized to previous program entry), and then
+ * sets that "target" to the current index which represents the label "l#".
+ * That entry's "when_to_branch" is set to the value passed in (the "invert"
+ * or "!invert"). Then it sets the current program entry's target to the saved
+ * "target" value (the old value of the program that had its "target" updated
+ * to the label).
+ *
+ * Looking back at "a || b && c", we have the following steps:
+ *  "a"  - prog[0] = { "a", X, -1 } // pred, when_to_branch, target
+ *  "||" - flag that we need to process "||"; continue outer loop
+ *  "b"  - prog[1] = { "b", X, 0 }
+ *  "&&" - flag that we need to process "&&"; continue outer loop
+ * (Notice we did not process "||")
+ *  "c"  - prog[2] = { "c", X, 1 }
+ *  update_preds(prog, 2, 0); // invert = 0 as we are processing "&&"
+ *    t = prog[2].target; // t = 1
+ *    s = prog[t].target; // s = 0
+ *    prog[t].target = 2; // Set target to "l2"
+ *    prog[t].when_to_branch = 0;
+ *    prog[2].target = s;
+ * update_preds(prog, 2, 1); // invert = 1 as we are now processing "||"
+ *    t = prog[2].target; // t = 0
+ *    s = prog[t].target; // s = -1
+ *    prog[t].target = 2; // Set target to "l2"
+ *    prog[t].when_to_branch = 1;
+ *    prog[2].target = s;
+ *
+ * #13 Which brings us to the final step of the first pass, which is to set
+ *     the last program entry's when_to_branch and target, which will be
+ *     when_to_branch = 0; target = N; ( the label after the program entry after
+ *     the last program entry processed above).
+ *
+ * If we denote "TRUE" to be the entry after the last program entry processed,
+ * and "FALSE" the program entry after that, we are now done with the first
+ * pass.
+ *
+ * Making the above "a || b && c" have a progam of:
+ *  prog[0] = { "a", 1, 2 }
+ *  prog[1] = { "b", 0, 2 }
+ *  prog[2] = { "c", 0, 3 }
+ *
+ * Which translates into:
+ * n0: r = a; l0: if (r) goto l2;
+ * n1: r = b; l1: if (!r) goto l2;
+ * n2: r = c; l2: if (!r) goto l3;  // Which is the same as "goto F;"
+ * T: return TRUE; l3:
+ * F: return FALSE
+ *
+ * Although, after the first pass, the program is correct, it is
+ * inefficient. The simple sample of "a || b && c" could be easily been
+ * converted into:
+ * n0: r = a; if (r) goto T
+ * n1: r = b; if (!r) goto F
+ * n2: r = c; if (!r) goto F
+ * T: return TRUE;
+ * F: return FALSE;
+ *
+ * The First Pass is over the input string. The next too passes are over
+ * the program itself.
+ *
+ * ** SECOND PASS **
+ *
+ * Which brings us to the second pass. If a jump to a label has the
+ * same condition as that label, it can instead jump to its target.
+ * The original example of "a && !(!b || (c && g)) || d || e && !f"
+ * where the first pass gives us:
+ *
+ * n1: r=a;       l1: if (!r) goto l4;
+ * n2: r=b;       l2: if (!r) goto l4;
+ * n3: r=c; r=!r; l3: if (r) goto l4;
+ * n4: r=g; r=!r; l4: if (r) goto l5;
+ * n5: r=d;       l5: if (r) goto T
+ * n6: r=e;       l6: if (!r) goto l7;
+ * n7: r=f; r=!r; l7: if (!r) goto F:
+ * T: return TRUE;
+ * F: return FALSE
+ *
+ * We can see that "l3: if (r) goto l4;" and at l4, we have "if (r) goto l5;".
+ * And "l5: if (r) goto T", we could optimize this by converting l3 and l4
+ * to go directly to T. To accomplish this, we start from the last
+ * entry in the program and work our way back. If the target of the entry
+ * has the same "when_to_branch" then we could use that entry's target.
+ * Doing this, the above would end up as:
+ *
+ * n1: r=a;       l1: if (!r) goto l4;
+ * n2: r=b;       l2: if (!r) goto l4;
+ * n3: r=c; r=!r; l3: if (r) goto T;
+ * n4: r=g; r=!r; l4: if (r) goto T;
+ * n5: r=d;       l5: if (r) goto T;
+ * n6: r=e;       l6: if (!r) goto F;
+ * n7: r=f; r=!r; l7: if (!r) goto F;
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * In that same pass, if the "when_to_branch" doesn't match, we can simply
+ * go to the program entry after the label. That is, "l2: if (!r) goto l4;"
+ * where "l4: if (r) goto T;", then we can convert l2 to be:
+ * "l2: if (!r) goto n5;".
+ *
+ * This will have the second pass give us:
+ * n1: r=a;       l1: if (!r) goto n5;
+ * n2: r=b;       l2: if (!r) goto n5;
+ * n3: r=c; r=!r; l3: if (r) goto T;
+ * n4: r=g; r=!r; l4: if (r) goto T;
+ * n5: r=d;       l5: if (r) goto T
+ * n6: r=e;       l6: if (!r) goto F;
+ * n7: r=f; r=!r; l7: if (!r) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * Notice, all the "l#" labels are no longer used, and they can now
+ * be discarded.
+ *
+ * ** THIRD PASS **
+ *
+ * For the third pass we deal with the inverts. As they simply just
+ * make the "when_to_branch" get inverted, a simple loop over the
+ * program to that does: "when_to_branch ^= invert;" will do the
+ * job, leaving us with:
+ * n1: r=a; if (!r) goto n5;
+ * n2: r=b; if (!r) goto n5;
+ * n3: r=c: if (!r) goto T;
+ * n4: r=g; if (!r) goto T;
+ * n5: r=d; if (r) goto T
+ * n6: r=e; if (!r) goto F;
+ * n7: r=f; if (r) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * As "r = a; if (!r) goto n5;" is obviously the same as
+ * "if (!a) goto n5;" without doing anything we can interperate the
+ * program as:
+ * n1: if (!a) goto n5;
+ * n2: if (!b) goto n5;
+ * n3: if (!c) goto T;
+ * n4: if (!g) goto T;
+ * n5: if (d) goto T
+ * n6: if (!e) goto F;
+ * n7: if (f) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * Since the inverts are discarded at the end, there's no reason to store
+ * them in the program array (and waste memory). A separate array to hold
+ * the inverts is used and freed at the end.
+ */
+static struct prog_entry *
+predicate_parse(const char *str, int nr_parens, int nr_preds,
+               parse_pred_fn parse_pred, void *data,
+               struct filter_parse_error *pe)
+{
+       struct prog_entry *prog_stack;
+       struct prog_entry *prog;
+       const char *ptr = str;
+       char *inverts = NULL;
+       int *op_stack;
+       int *top;
+       int invert = 0;
+       int ret = -ENOMEM;
+       int len;
+       int N = 0;
+       int i;
+
+       nr_preds += 2; /* For TRUE and FALSE */
+
+       op_stack = kmalloc(sizeof(*op_stack) * nr_parens, GFP_KERNEL);
+       if (!op_stack)
+               return ERR_PTR(-ENOMEM);
+       prog_stack = kmalloc(sizeof(*prog_stack) * nr_preds, GFP_KERNEL);
+       if (!prog_stack) {
+               parse_error(pe, -ENOMEM, 0);
+               goto out_free;
+       }
+       inverts = kmalloc(sizeof(*inverts) * nr_preds, GFP_KERNEL);
+       if (!inverts) {
+               parse_error(pe, -ENOMEM, 0);
+               goto out_free;
+       }
+
+       top = op_stack;
+       prog = prog_stack;
+       *top = 0;
+
+       /* First pass */
+       while (*ptr) {                                          /* #1 */
+               const char *next = ptr++;
+
+               if (isspace(*next))
+                       continue;
+
+               switch (*next) {
+               case '(':                                       /* #2 */
+                       if (top - op_stack > nr_parens)
+                               return ERR_PTR(-EINVAL);
+                       *(++top) = invert;
+                       continue;
+               case '!':                                       /* #3 */
+                       if (!is_not(next))
+                               break;
+                       invert = !invert;
+                       continue;
+               }
+
+               if (N >= nr_preds) {
+                       parse_error(pe, FILT_ERR_TOO_MANY_PREDS, next - str);
+                       goto out_free;
+               }
+
+               inverts[N] = invert;                            /* #4 */
+               prog[N].target = N-1;
+
+               len = parse_pred(next, data, ptr - str, pe, &prog[N].pred);
+               if (len < 0) {
+                       ret = len;
+                       goto out_free;
+               }
+               ptr = next + len;
+
+               N++;
+
+               ret = -1;
+               while (1) {                                     /* #5 */
+                       next = ptr++;
+                       if (isspace(*next))
+                               continue;
+
+                       switch (*next) {
+                       case ')':
+                       case '\0':
+                               break;
+                       case '&':
+                       case '|':
+                               if (next[1] == next[0]) {
+                                       ptr++;
+                                       break;
+                               }
+                       default:
+                               parse_error(pe, FILT_ERR_TOO_MANY_PREDS,
+                                           next - str);
+                               goto out_free;
+                       }
+
+                       invert = *top & INVERT;
+
+                       if (*top & PROCESS_AND) {               /* #7 */
+                               update_preds(prog, N - 1, invert);
+                               *top &= ~PROCESS_AND;
+                       }
+                       if (*next == '&') {                     /* #8 */
+                               *top |= PROCESS_AND;
+                               break;
+                       }
+                       if (*top & PROCESS_OR) {                /* #9 */
+                               update_preds(prog, N - 1, !invert);
+                               *top &= ~PROCESS_OR;
+                       }
+                       if (*next == '|') {                     /* #10 */
+                               *top |= PROCESS_OR;
+                               break;
+                       }
+                       if (!*next)                             /* #11 */
+                               goto out;
+
+                       if (top == op_stack) {
+                               ret = -1;
+                               /* Too few '(' */
+                               parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, ptr - str);
+                               goto out_free;
+                       }
+                       top--;                                  /* #12 */
+               }
+       }
+ out:
+       if (top != op_stack) {
+               /* Too many '(' */
+               parse_error(pe, FILT_ERR_TOO_MANY_OPEN, ptr - str);
+               goto out_free;
+       }
+
+       prog[N].pred = NULL;                                    /* #13 */
+       prog[N].target = 1;             /* TRUE */
+       prog[N+1].pred = NULL;
+       prog[N+1].target = 0;           /* FALSE */
+       prog[N-1].target = N;
+       prog[N-1].when_to_branch = false;
+
+       /* Second Pass */
+       for (i = N-1 ; i--; ) {
+               int target = prog[i].target;
+               if (prog[i].when_to_branch == prog[target].when_to_branch)
+                       prog[i].target = prog[target].target;
+       }
+
+       /* Third Pass */
+       for (i = 0; i < N; i++) {
+               invert = inverts[i] ^ prog[i].when_to_branch;
+               prog[i].when_to_branch = invert;
+               /* Make sure the program always moves forward */
+               if (WARN_ON(prog[i].target <= i)) {
+                       ret = -EINVAL;
+                       goto out_free;
+               }
+       }
+
+       return prog;
+out_free:
+       kfree(op_stack);
+       kfree(prog_stack);
+       kfree(inverts);
+       return ERR_PTR(ret);
+}
+
 #define DEFINE_COMPARISON_PRED(type)                                   \
 static int filter_pred_LT_##type(struct filter_pred *pred, void *event)        \
 {                                                                      \
        type *addr = (type *)(event + pred->offset);                    \
        type val = (type)pred->val;                                     \
-       int match = (*addr < val);                                      \
-       return !!match == !pred->not;                                   \
+       return *addr < val;                                             \
 }                                                                      \
 static int filter_pred_LE_##type(struct filter_pred *pred, void *event)        \
 {                                                                      \
        type *addr = (type *)(event + pred->offset);                    \
        type val = (type)pred->val;                                     \
-       int match = (*addr <= val);                                     \
-       return !!match == !pred->not;                                   \
+       return *addr <= val;                                            \
 }                                                                      \
 static int filter_pred_GT_##type(struct filter_pred *pred, void *event)        \
 {                                                                      \
        type *addr = (type *)(event + pred->offset);                    \
        type val = (type)pred->val;                                     \
-       int match = (*addr > val);                                      \
-       return !!match == !pred->not;                                   \
+       return *addr > val;                                     \
 }                                                                      \
 static int filter_pred_GE_##type(struct filter_pred *pred, void *event)        \
 {                                                                      \
        type *addr = (type *)(event + pred->offset);                    \
        type val = (type)pred->val;                                     \
-       int match = (*addr >= val);                                     \
-       return !!match == !pred->not;                                   \
+       return *addr >= val;                                            \
 }                                                                      \
 static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \
 {                                                                      \
        type *addr = (type *)(event + pred->offset);                    \
        type val = (type)pred->val;                                     \
-       int match = !!(*addr & val);                                    \
-       return match == !pred->not;                                     \
+       return !!(*addr & val);                                         \
 }                                                                      \
 static const filter_pred_fn_t pred_funcs_##type[] = {                  \
-       filter_pred_LT_##type,                                          \
        filter_pred_LE_##type,                                          \
-       filter_pred_GT_##type,                                          \
+       filter_pred_LT_##type,                                          \
        filter_pred_GE_##type,                                          \
+       filter_pred_GT_##type,                                          \
        filter_pred_BAND_##type,                                        \
 };
 
-#define PRED_FUNC_START                        OP_LT
-
 #define DEFINE_EQUALITY_PRED(size)                                     \
 static int filter_pred_##size(struct filter_pred *pred, void *event)   \
 {                                                                      \
@@ -272,44 +704,36 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
 static int filter_pred_cpu(struct filter_pred *pred, void *event)
 {
        int cpu, cmp;
-       int match = 0;
 
        cpu = raw_smp_processor_id();
        cmp = pred->val;
 
        switch (pred->op) {
        case OP_EQ:
-               match = cpu == cmp;
-               break;
+               return cpu == cmp;
+       case OP_NE:
+               return cpu != cmp;
        case OP_LT:
-               match = cpu < cmp;
-               break;
+               return cpu < cmp;
        case OP_LE:
-               match = cpu <= cmp;
-               break;
+               return cpu <= cmp;
        case OP_GT:
-               match = cpu > cmp;
-               break;
+               return cpu > cmp;
        case OP_GE:
-               match = cpu >= cmp;
-               break;
+               return cpu >= cmp;
        default:
-               break;
+               return 0;
        }
-
-       return !!match == !pred->not;
 }
 
 /* Filter predicate for COMM. */
 static int filter_pred_comm(struct filter_pred *pred, void *event)
 {
-       int cmp, match;
+       int cmp;
 
        cmp = pred->regex.match(current->comm, &pred->regex,
-                               pred->regex.field_len);
-       match = cmp ^ pred->not;
-
-       return match;
+                               TASK_COMM_LEN);
+       return cmp ^ pred->not;
 }
 
 static int filter_pred_none(struct filter_pred *pred, void *event)
@@ -366,6 +790,7 @@ static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused)
                return 1;
        return 0;
 }
+
 /**
  * filter_parse_regex - parse a basic regex
  * @buff:   the raw regex
@@ -426,10 +851,9 @@ static void filter_build_regex(struct filter_pred *pred)
        struct regex *r = &pred->regex;
        char *search;
        enum regex_type type = MATCH_FULL;
-       int not = 0;
 
        if (pred->op == OP_GLOB) {
-               type = filter_parse_regex(r->pattern, r->len, &search, &not);
+               type = filter_parse_regex(r->pattern, r->len, &search, &pred->not);
                r->len = strlen(search);
                memmove(r->pattern, search, r->len+1);
        }
@@ -451,210 +875,32 @@ static void filter_build_regex(struct filter_pred *pred)
                r->match = regex_match_glob;
                break;
        }
-
-       pred->not ^= not;
-}
-
-enum move_type {
-       MOVE_DOWN,
-       MOVE_UP_FROM_LEFT,
-       MOVE_UP_FROM_RIGHT
-};
-
-static struct filter_pred *
-get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
-               int index, enum move_type *move)
-{
-       if (pred->parent & FILTER_PRED_IS_RIGHT)
-               *move = MOVE_UP_FROM_RIGHT;
-       else
-               *move = MOVE_UP_FROM_LEFT;
-       pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
-
-       return pred;
-}
-
-enum walk_return {
-       WALK_PRED_ABORT,
-       WALK_PRED_PARENT,
-       WALK_PRED_DEFAULT,
-};
-
-typedef int (*filter_pred_walkcb_t) (enum move_type move,
-                                    struct filter_pred *pred,
-                                    int *err, void *data);
-
-static int walk_pred_tree(struct filter_pred *preds,
-                         struct filter_pred *root,
-                         filter_pred_walkcb_t cb, void *data)
-{
-       struct filter_pred *pred = root;
-       enum move_type move = MOVE_DOWN;
-       int done = 0;
-
-       if  (!preds)
-               return -EINVAL;
-
-       do {
-               int err = 0, ret;
-
-               ret = cb(move, pred, &err, data);
-               if (ret == WALK_PRED_ABORT)
-                       return err;
-               if (ret == WALK_PRED_PARENT)
-                       goto get_parent;
-
-               switch (move) {
-               case MOVE_DOWN:
-                       if (pred->left != FILTER_PRED_INVALID) {
-                               pred = &preds[pred->left];
-                               continue;
-                       }
-                       goto get_parent;
-               case MOVE_UP_FROM_LEFT:
-                       pred = &preds[pred->right];
-                       move = MOVE_DOWN;
-                       continue;
-               case MOVE_UP_FROM_RIGHT:
- get_parent:
-                       if (pred == root)
-                               break;
-                       pred = get_pred_parent(pred, preds,
-                                              pred->parent,
-                                              &move);
-                       continue;
-               }
-               done = 1;
-       } while (!done);
-
-       /* We are fine. */
-       return 0;
-}
-
-/*
- * A series of AND or ORs where found together. Instead of
- * climbing up and down the tree branches, an array of the
- * ops were made in order of checks. We can just move across
- * the array and short circuit if needed.
- */
-static int process_ops(struct filter_pred *preds,
-                      struct filter_pred *op, void *rec)
-{
-       struct filter_pred *pred;
-       int match = 0;
-       int type;
-       int i;
-
-       /*
-        * Micro-optimization: We set type to true if op
-        * is an OR and false otherwise (AND). Then we
-        * just need to test if the match is equal to
-        * the type, and if it is, we can short circuit the
-        * rest of the checks:
-        *
-        * if ((match && op->op == OP_OR) ||
-        *     (!match && op->op == OP_AND))
-        *        return match;
-        */
-       type = op->op == OP_OR;
-
-       for (i = 0; i < op->val; i++) {
-               pred = &preds[op->ops[i]];
-               if (!WARN_ON_ONCE(!pred->fn))
-                       match = pred->fn(pred, rec);
-               if (!!match == type)
-                       break;
-       }
-       /* If not of not match is equal to not of not, then it is a match */
-       return !!match == !op->not;
-}
-
-struct filter_match_preds_data {
-       struct filter_pred *preds;
-       int match;
-       void *rec;
-};
-
-static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred,
-                                int *err, void *data)
-{
-       struct filter_match_preds_data *d = data;
-
-       *err = 0;
-       switch (move) {
-       case MOVE_DOWN:
-               /* only AND and OR have children */
-               if (pred->left != FILTER_PRED_INVALID) {
-                       /* If ops is set, then it was folded. */
-                       if (!pred->ops)
-                               return WALK_PRED_DEFAULT;
-                       /* We can treat folded ops as a leaf node */
-                       d->match = process_ops(d->preds, pred, d->rec);
-               } else {
-                       if (!WARN_ON_ONCE(!pred->fn))
-                               d->match = pred->fn(pred, d->rec);
-               }
-
-               return WALK_PRED_PARENT;
-       case MOVE_UP_FROM_LEFT:
-               /*
-                * Check for short circuits.
-                *
-                * Optimization: !!match == (pred->op == OP_OR)
-                *   is the same as:
-                * if ((match && pred->op == OP_OR) ||
-                *     (!match && pred->op == OP_AND))
-                */
-               if (!!d->match == (pred->op == OP_OR))
-                       return WALK_PRED_PARENT;
-               break;
-       case MOVE_UP_FROM_RIGHT:
-               break;
-       }
-
-       return WALK_PRED_DEFAULT;
 }
 
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct event_filter *filter, void *rec)
 {
-       struct filter_pred *preds;
-       struct filter_pred *root;
-       struct filter_match_preds_data data = {
-               /* match is currently meaningless */
-               .match = -1,
-               .rec   = rec,
-       };
-       int n_preds, ret;
+       struct prog_entry *prog;
+       int i;
 
        /* no filter is considered a match */
        if (!filter)
                return 1;
 
-       n_preds = filter->n_preds;
-       if (!n_preds)
-               return 1;
-
-       /*
-        * n_preds, root and filter->preds are protect with preemption disabled.
-        */
-       root = rcu_dereference_sched(filter->root);
-       if (!root)
+       prog = rcu_dereference_sched(filter->prog);
+       if (!prog)
                return 1;
 
-       data.preds = preds = rcu_dereference_sched(filter->preds);
-       ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data);
-       WARN_ON(ret);
-       return data.match;
+       for (i = 0; prog[i].pred; i++) {
+               struct filter_pred *pred = prog[i].pred;
+               int match = pred->fn(pred, rec);
+               if (match == prog[i].when_to_branch)
+                       i = prog[i].target;
+       }
+       return prog[i].target;
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
-static void parse_error(struct filter_parse_state *ps, int err, int pos)
-{
-       ps->lasterr = err;
-       ps->lasterr_pos = pos;
-}
-
 static void remove_filter_string(struct event_filter *filter)
 {
        if (!filter)
@@ -664,57 +910,44 @@ static void remove_filter_string(struct event_filter *filter)
        filter->filter_string = NULL;
 }
 
-static int replace_filter_string(struct event_filter *filter,
-                                char *filter_string)
-{
-       kfree(filter->filter_string);
-       filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
-       if (!filter->filter_string)
-               return -ENOMEM;
-
-       return 0;
-}
-
-static int append_filter_string(struct event_filter *filter,
-                               char *string)
-{
-       int newlen;
-       char *new_filter_string;
-
-       BUG_ON(!filter->filter_string);
-       newlen = strlen(filter->filter_string) + strlen(string) + 1;
-       new_filter_string = kmalloc(newlen, GFP_KERNEL);
-       if (!new_filter_string)
-               return -ENOMEM;
-
-       strcpy(new_filter_string, filter->filter_string);
-       strcat(new_filter_string, string);
-       kfree(filter->filter_string);
-       filter->filter_string = new_filter_string;
-
-       return 0;
-}
-
-static void append_filter_err(struct filter_parse_state *ps,
+static void append_filter_err(struct filter_parse_error *pe,
                              struct event_filter *filter)
 {
-       int pos = ps->lasterr_pos;
-       char *buf, *pbuf;
+       struct trace_seq *s;
+       int pos = pe->lasterr_pos;
+       char *buf;
+       int len;
+
+       if (WARN_ON(!filter->filter_string))
+               return;
 
-       buf = (char *)__get_free_page(GFP_KERNEL);
-       if (!buf)
+       s = kmalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
                return;
+       trace_seq_init(s);
+
+       len = strlen(filter->filter_string);
+       if (pos > len)
+               pos = len;
 
-       append_filter_string(filter, "\n");
-       memset(buf, ' ', PAGE_SIZE);
-       if (pos > PAGE_SIZE - 128)
-               pos = 0;
-       buf[pos] = '^';
-       pbuf = &buf[pos] + 1;
+       /* indexing is off by one */
+       if (pos)
+               pos++;
 
-       sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
-       append_filter_string(filter, buf);
-       free_page((unsigned long) buf);
+       trace_seq_puts(s, filter->filter_string);
+       if (pe->lasterr > 0) {
+               trace_seq_printf(s, "\n%*s", pos, "^");
+               trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]);
+       } else {
+               trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr);
+       }
+       trace_seq_putc(s, 0);
+       buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL);
+       if (buf) {
+               kfree(filter->filter_string);
+               filter->filter_string = buf;
+       }
+       kfree(s);
 }
 
 static inline struct event_filter *event_filter(struct trace_event_file *file)
@@ -747,166 +980,44 @@ void print_subsystem_event_filter(struct event_subsystem *system,
        mutex_unlock(&event_mutex);
 }
 
-static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
+static void free_prog(struct event_filter *filter)
 {
-       stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL);
-       if (!stack->preds)
-               return -ENOMEM;
-       stack->index = n_preds;
-       return 0;
-}
+       struct prog_entry *prog;
+       int i;
 
-static void __free_pred_stack(struct pred_stack *stack)
-{
-       kfree(stack->preds);
-       stack->index = 0;
+       prog = rcu_access_pointer(filter->prog);
+       if (!prog)
+               return;
+
+       for (i = 0; prog[i].pred; i++)
+               kfree(prog[i].pred);
+       kfree(prog);
 }
 
-static int __push_pred_stack(struct pred_stack *stack,
-                            struct filter_pred *pred)
+static void filter_disable(struct trace_event_file *file)
 {
-       int index = stack->index;
+       unsigned long old_flags = file->flags;
 
-       if (WARN_ON(index == 0))
-               return -ENOSPC;
+       file->flags &= ~EVENT_FILE_FL_FILTERED;
 
-       stack->preds[--index] = pred;
-       stack->index = index;
-       return 0;
+       if (old_flags != file->flags)
+               trace_buffered_event_disable();
 }
 
-static struct filter_pred *
-__pop_pred_stack(struct pred_stack *stack)
+static void __free_filter(struct event_filter *filter)
 {
-       struct filter_pred *pred;
-       int index = stack->index;
-
-       pred = stack->preds[index++];
-       if (!pred)
-               return NULL;
+       if (!filter)
+               return;
 
-       stack->index = index;
-       return pred;
+       free_prog(filter);
+       kfree(filter->filter_string);
+       kfree(filter);
 }
 
-static int filter_set_pred(struct event_filter *filter,
-                          int idx,
-                          struct pred_stack *stack,
-                          struct filter_pred *src)
+void free_event_filter(struct event_filter *filter)
 {
-       struct filter_pred *dest = &filter->preds[idx];
-       struct filter_pred *left;
-       struct filter_pred *right;
-
-       *dest = *src;
-       dest->index = idx;
-
-       if (dest->op == OP_OR || dest->op == OP_AND) {
-               right = __pop_pred_stack(stack);
-               left = __pop_pred_stack(stack);
-               if (!left || !right)
-                       return -EINVAL;
-               /*
-                * If both children can be folded
-                * and they are the same op as this op or a leaf,
-                * then this op can be folded.
-                */
-               if (left->index & FILTER_PRED_FOLD &&
-                   ((left->op == dest->op && !left->not) ||
-                    left->left == FILTER_PRED_INVALID) &&
-                   right->index & FILTER_PRED_FOLD &&
-                   ((right->op == dest->op && !right->not) ||
-                    right->left == FILTER_PRED_INVALID))
-                       dest->index |= FILTER_PRED_FOLD;
-
-               dest->left = left->index & ~FILTER_PRED_FOLD;
-               dest->right = right->index & ~FILTER_PRED_FOLD;
-               left->parent = dest->index & ~FILTER_PRED_FOLD;
-               right->parent = dest->index | FILTER_PRED_IS_RIGHT;
-       } else {
-               /*
-                * Make dest->left invalid to be used as a quick
-                * way to know this is a leaf node.
-                */
-               dest->left = FILTER_PRED_INVALID;
-
-               /* All leafs allow folding the parent ops. */
-               dest->index |= FILTER_PRED_FOLD;
-       }
-
-       return __push_pred_stack(stack, dest);
-}
-
-static void __free_preds(struct event_filter *filter)
-{
-       int i;
-
-       if (filter->preds) {
-               for (i = 0; i < filter->n_preds; i++)
-                       kfree(filter->preds[i].ops);
-               kfree(filter->preds);
-               filter->preds = NULL;
-       }
-       filter->a_preds = 0;
-       filter->n_preds = 0;
-}
-
-static void filter_disable(struct trace_event_file *file)
-{
-       unsigned long old_flags = file->flags;
-
-       file->flags &= ~EVENT_FILE_FL_FILTERED;
-
-       if (old_flags != file->flags)
-               trace_buffered_event_disable();
-}
-
-static void __free_filter(struct event_filter *filter)
-{
-       if (!filter)
-               return;
-
-       __free_preds(filter);
-       kfree(filter->filter_string);
-       kfree(filter);
-}
-
-void free_event_filter(struct event_filter *filter)
-{
-       __free_filter(filter);
-}
-
-static struct event_filter *__alloc_filter(void)
-{
-       struct event_filter *filter;
-
-       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-       return filter;
-}
-
-static int __alloc_preds(struct event_filter *filter, int n_preds)
-{
-       struct filter_pred *pred;
-       int i;
-
-       if (filter->preds)
-               __free_preds(filter);
-
-       filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL);
-
-       if (!filter->preds)
-               return -ENOMEM;
-
-       filter->a_preds = n_preds;
-       filter->n_preds = 0;
-
-       for (i = 0; i < n_preds; i++) {
-               pred = &filter->preds[i];
-               pred->fn = filter_pred_none;
-       }
-
-       return 0;
-}
+       __free_filter(filter);
+}
 
 static inline void __remove_filter(struct trace_event_file *file)
 {
@@ -937,800 +1048,467 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
 {
        struct trace_event_file *file;
 
-       list_for_each_entry(file, &tr->events, list) {
-               if (file->system != dir)
-                       continue;
-               __free_subsystem_filter(file);
-       }
-}
-
-static int filter_add_pred(struct filter_parse_state *ps,
-                          struct event_filter *filter,
-                          struct filter_pred *pred,
-                          struct pred_stack *stack)
-{
-       int err;
-
-       if (WARN_ON(filter->n_preds == filter->a_preds)) {
-               parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-               return -ENOSPC;
-       }
-
-       err = filter_set_pred(filter, filter->n_preds, stack, pred);
-       if (err)
-               return err;
-
-       filter->n_preds++;
-
-       return 0;
-}
-
-int filter_assign_type(const char *type)
-{
-       if (strstr(type, "__data_loc") && strstr(type, "char"))
-               return FILTER_DYN_STRING;
-
-       if (strchr(type, '[') && strstr(type, "char"))
-               return FILTER_STATIC_STRING;
-
-       return FILTER_OTHER;
-}
-
-static bool is_legal_op(struct ftrace_event_field *field, enum filter_op_ids op)
-{
-       if (is_string_field(field) &&
-           (op != OP_EQ && op != OP_NE && op != OP_GLOB))
-               return false;
-       if (!is_string_field(field) && op == OP_GLOB)
-               return false;
-
-       return true;
-}
-
-static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op,
-                                           int field_size, int field_is_signed)
-{
-       filter_pred_fn_t fn = NULL;
-
-       switch (field_size) {
-       case 8:
-               if (op == OP_EQ || op == OP_NE)
-                       fn = filter_pred_64;
-               else if (field_is_signed)
-                       fn = pred_funcs_s64[op - PRED_FUNC_START];
-               else
-                       fn = pred_funcs_u64[op - PRED_FUNC_START];
-               break;
-       case 4:
-               if (op == OP_EQ || op == OP_NE)
-                       fn = filter_pred_32;
-               else if (field_is_signed)
-                       fn = pred_funcs_s32[op - PRED_FUNC_START];
-               else
-                       fn = pred_funcs_u32[op - PRED_FUNC_START];
-               break;
-       case 2:
-               if (op == OP_EQ || op == OP_NE)
-                       fn = filter_pred_16;
-               else if (field_is_signed)
-                       fn = pred_funcs_s16[op - PRED_FUNC_START];
-               else
-                       fn = pred_funcs_u16[op - PRED_FUNC_START];
-               break;
-       case 1:
-               if (op == OP_EQ || op == OP_NE)
-                       fn = filter_pred_8;
-               else if (field_is_signed)
-                       fn = pred_funcs_s8[op - PRED_FUNC_START];
-               else
-                       fn = pred_funcs_u8[op - PRED_FUNC_START];
-               break;
-       }
-
-       return fn;
-}
-
-static int init_pred(struct filter_parse_state *ps,
-                    struct ftrace_event_field *field,
-                    struct filter_pred *pred)
-
-{
-       filter_pred_fn_t fn = filter_pred_none;
-       unsigned long long val;
-       int ret;
-
-       pred->offset = field->offset;
-
-       if (!is_legal_op(field, pred->op)) {
-               parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
-               return -EINVAL;
-       }
-
-       if (field->filter_type == FILTER_COMM) {
-               filter_build_regex(pred);
-               fn = filter_pred_comm;
-               pred->regex.field_len = TASK_COMM_LEN;
-       } else if (is_string_field(field)) {
-               filter_build_regex(pred);
-
-               if (field->filter_type == FILTER_STATIC_STRING) {
-                       fn = filter_pred_string;
-                       pred->regex.field_len = field->size;
-               } else if (field->filter_type == FILTER_DYN_STRING)
-                       fn = filter_pred_strloc;
-               else
-                       fn = filter_pred_pchar;
-       } else if (is_function_field(field)) {
-               if (strcmp(field->name, "ip")) {
-                       parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0);
-                       return -EINVAL;
-               }
-       } else {
-               if (field->is_signed)
-                       ret = kstrtoll(pred->regex.pattern, 0, &val);
-               else
-                       ret = kstrtoull(pred->regex.pattern, 0, &val);
-               if (ret) {
-                       parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
-                       return -EINVAL;
-               }
-               pred->val = val;
-
-               if (field->filter_type == FILTER_CPU)
-                       fn = filter_pred_cpu;
-               else
-                       fn = select_comparison_fn(pred->op, field->size,
-                                         field->is_signed);
-               if (!fn) {
-                       parse_error(ps, FILT_ERR_INVALID_OP, 0);
-                       return -EINVAL;
-               }
-       }
-
-       if (pred->op == OP_NE)
-               pred->not ^= 1;
-
-       pred->fn = fn;
-       return 0;
-}
-
-static void parse_init(struct filter_parse_state *ps,
-                      struct filter_op *ops,
-                      char *infix_string)
-{
-       memset(ps, '\0', sizeof(*ps));
-
-       ps->infix.string = infix_string;
-       ps->infix.cnt = strlen(infix_string);
-       ps->ops = ops;
-
-       INIT_LIST_HEAD(&ps->opstack);
-       INIT_LIST_HEAD(&ps->postfix);
-}
-
-static char infix_next(struct filter_parse_state *ps)
-{
-       if (!ps->infix.cnt)
-               return 0;
-
-       ps->infix.cnt--;
-
-       return ps->infix.string[ps->infix.tail++];
-}
-
-static char infix_peek(struct filter_parse_state *ps)
-{
-       if (ps->infix.tail == strlen(ps->infix.string))
-               return 0;
-
-       return ps->infix.string[ps->infix.tail];
-}
-
-static void infix_advance(struct filter_parse_state *ps)
-{
-       if (!ps->infix.cnt)
-               return;
-
-       ps->infix.cnt--;
-       ps->infix.tail++;
-}
-
-static inline int is_precedence_lower(struct filter_parse_state *ps,
-                                     int a, int b)
-{
-       return ps->ops[a].precedence < ps->ops[b].precedence;
-}
-
-static inline int is_op_char(struct filter_parse_state *ps, char c)
-{
-       int i;
-
-       for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
-               if (ps->ops[i].string[0] == c)
-                       return 1;
-       }
-
-       return 0;
-}
-
-static int infix_get_op(struct filter_parse_state *ps, char firstc)
-{
-       char nextc = infix_peek(ps);
-       char opstr[3];
-       int i;
-
-       opstr[0] = firstc;
-       opstr[1] = nextc;
-       opstr[2] = '\0';
-
-       for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
-               if (!strcmp(opstr, ps->ops[i].string)) {
-                       infix_advance(ps);
-                       return ps->ops[i].id;
-               }
-       }
-
-       opstr[1] = '\0';
-
-       for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
-               if (!strcmp(opstr, ps->ops[i].string))
-                       return ps->ops[i].id;
-       }
-
-       return OP_NONE;
-}
-
-static inline void clear_operand_string(struct filter_parse_state *ps)
-{
-       memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
-       ps->operand.tail = 0;
-}
-
-static inline int append_operand_char(struct filter_parse_state *ps, char c)
-{
-       if (ps->operand.tail == MAX_FILTER_STR_VAL - 1)
-               return -EINVAL;
-
-       ps->operand.string[ps->operand.tail++] = c;
-
-       return 0;
-}
-
-static int filter_opstack_push(struct filter_parse_state *ps,
-                              enum filter_op_ids op)
-{
-       struct opstack_op *opstack_op;
-
-       opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
-       if (!opstack_op)
-               return -ENOMEM;
-
-       opstack_op->op = op;
-       list_add(&opstack_op->list, &ps->opstack);
-
-       return 0;
-}
-
-static int filter_opstack_empty(struct filter_parse_state *ps)
-{
-       return list_empty(&ps->opstack);
-}
-
-static int filter_opstack_top(struct filter_parse_state *ps)
-{
-       struct opstack_op *opstack_op;
-
-       if (filter_opstack_empty(ps))
-               return OP_NONE;
-
-       opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
-
-       return opstack_op->op;
-}
-
-static int filter_opstack_pop(struct filter_parse_state *ps)
-{
-       struct opstack_op *opstack_op;
-       enum filter_op_ids op;
-
-       if (filter_opstack_empty(ps))
-               return OP_NONE;
-
-       opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
-       op = opstack_op->op;
-       list_del(&opstack_op->list);
-
-       kfree(opstack_op);
-
-       return op;
-}
-
-static void filter_opstack_clear(struct filter_parse_state *ps)
-{
-       while (!filter_opstack_empty(ps))
-               filter_opstack_pop(ps);
-}
-
-static char *curr_operand(struct filter_parse_state *ps)
-{
-       return ps->operand.string;
-}
-
-static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
-{
-       struct postfix_elt *elt;
-
-       elt = kmalloc(sizeof(*elt), GFP_KERNEL);
-       if (!elt)
-               return -ENOMEM;
-
-       elt->op = OP_NONE;
-       elt->operand = kstrdup(operand, GFP_KERNEL);
-       if (!elt->operand) {
-               kfree(elt);
-               return -ENOMEM;
-       }
-
-       list_add_tail(&elt->list, &ps->postfix);
-
-       return 0;
-}
-
-static int postfix_append_op(struct filter_parse_state *ps, enum filter_op_ids op)
-{
-       struct postfix_elt *elt;
-
-       elt = kmalloc(sizeof(*elt), GFP_KERNEL);
-       if (!elt)
-               return -ENOMEM;
-
-       elt->op = op;
-       elt->operand = NULL;
-
-       list_add_tail(&elt->list, &ps->postfix);
-
-       return 0;
-}
-
-static void postfix_clear(struct filter_parse_state *ps)
-{
-       struct postfix_elt *elt;
-
-       while (!list_empty(&ps->postfix)) {
-               elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
-               list_del(&elt->list);
-               kfree(elt->operand);
-               kfree(elt);
-       }
-}
-
-static int filter_parse(struct filter_parse_state *ps)
-{
-       enum filter_op_ids op, top_op;
-       int in_string = 0;
-       char ch;
-
-       while ((ch = infix_next(ps))) {
-               if (ch == '"') {
-                       in_string ^= 1;
-                       continue;
-               }
-
-               if (in_string)
-                       goto parse_operand;
-
-               if (isspace(ch))
-                       continue;
-
-               if (is_op_char(ps, ch)) {
-                       op = infix_get_op(ps, ch);
-                       if (op == OP_NONE) {
-                               parse_error(ps, FILT_ERR_INVALID_OP, 0);
-                               return -EINVAL;
-                       }
-
-                       if (strlen(curr_operand(ps))) {
-                               postfix_append_operand(ps, curr_operand(ps));
-                               clear_operand_string(ps);
-                       }
-
-                       while (!filter_opstack_empty(ps)) {
-                               top_op = filter_opstack_top(ps);
-                               if (!is_precedence_lower(ps, top_op, op)) {
-                                       top_op = filter_opstack_pop(ps);
-                                       postfix_append_op(ps, top_op);
-                                       continue;
-                               }
-                               break;
-                       }
-
-                       filter_opstack_push(ps, op);
-                       continue;
-               }
-
-               if (ch == '(') {
-                       filter_opstack_push(ps, OP_OPEN_PAREN);
-                       continue;
-               }
-
-               if (ch == ')') {
-                       if (strlen(curr_operand(ps))) {
-                               postfix_append_operand(ps, curr_operand(ps));
-                               clear_operand_string(ps);
-                       }
-
-                       top_op = filter_opstack_pop(ps);
-                       while (top_op != OP_NONE) {
-                               if (top_op == OP_OPEN_PAREN)
-                                       break;
-                               postfix_append_op(ps, top_op);
-                               top_op = filter_opstack_pop(ps);
-                       }
-                       if (top_op == OP_NONE) {
-                               parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
-                               return -EINVAL;
-                       }
-                       continue;
-               }
-parse_operand:
-               if (append_operand_char(ps, ch)) {
-                       parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
-                       return -EINVAL;
-               }
-       }
-
-       if (strlen(curr_operand(ps)))
-               postfix_append_operand(ps, curr_operand(ps));
-
-       while (!filter_opstack_empty(ps)) {
-               top_op = filter_opstack_pop(ps);
-               if (top_op == OP_NONE)
-                       break;
-               if (top_op == OP_OPEN_PAREN) {
-                       parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
-                       return -EINVAL;
-               }
-               postfix_append_op(ps, top_op);
+       list_for_each_entry(file, &tr->events, list) {
+               if (file->system != dir)
+                       continue;
+               __free_subsystem_filter(file);
        }
-
-       return 0;
 }
 
-static struct filter_pred *create_pred(struct filter_parse_state *ps,
-                                      struct trace_event_call *call,
-                                      enum filter_op_ids op,
-                                      char *operand1, char *operand2)
+int filter_assign_type(const char *type)
 {
-       struct ftrace_event_field *field;
-       static struct filter_pred pred;
+       if (strstr(type, "__data_loc") && strstr(type, "char"))
+               return FILTER_DYN_STRING;
+
+       if (strchr(type, '[') && strstr(type, "char"))
+               return FILTER_STATIC_STRING;
 
-       memset(&pred, 0, sizeof(pred));
-       pred.op = op;
+       return FILTER_OTHER;
+}
 
-       if (op == OP_AND || op == OP_OR)
-               return &pred;
+static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op,
+                                           int field_size, int field_is_signed)
+{
+       filter_pred_fn_t fn = NULL;
+       int pred_func_index = -1;
 
-       if (!operand1 || !operand2) {
-               parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
-               return NULL;
+       switch (op) {
+       case OP_EQ:
+       case OP_NE:
+               break;
+       default:
+               if (WARN_ON_ONCE(op < PRED_FUNC_START))
+                       return NULL;
+               pred_func_index = op - PRED_FUNC_START;
+               if (WARN_ON_ONCE(pred_func_index > PRED_FUNC_MAX))
+                       return NULL;
        }
 
-       field = trace_find_event_field(call, operand1);
-       if (!field) {
-               parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
-               return NULL;
+       switch (field_size) {
+       case 8:
+               if (pred_func_index < 0)
+                       fn = filter_pred_64;
+               else if (field_is_signed)
+                       fn = pred_funcs_s64[pred_func_index];
+               else
+                       fn = pred_funcs_u64[pred_func_index];
+               break;
+       case 4:
+               if (pred_func_index < 0)
+                       fn = filter_pred_32;
+               else if (field_is_signed)
+                       fn = pred_funcs_s32[pred_func_index];
+               else
+                       fn = pred_funcs_u32[pred_func_index];
+               break;
+       case 2:
+               if (pred_func_index < 0)
+                       fn = filter_pred_16;
+               else if (field_is_signed)
+                       fn = pred_funcs_s16[pred_func_index];
+               else
+                       fn = pred_funcs_u16[pred_func_index];
+               break;
+       case 1:
+               if (pred_func_index < 0)
+                       fn = filter_pred_8;
+               else if (field_is_signed)
+                       fn = pred_funcs_s8[pred_func_index];
+               else
+                       fn = pred_funcs_u8[pred_func_index];
+               break;
        }
 
-       strcpy(pred.regex.pattern, operand2);
-       pred.regex.len = strlen(pred.regex.pattern);
-       pred.field = field;
-       return init_pred(ps, field, &pred) ? NULL : &pred;
+       return fn;
 }
 
-static int check_preds(struct filter_parse_state *ps)
+/* Called when a predicate is encountered by predicate_parse() */
+static int parse_pred(const char *str, void *data,
+                     int pos, struct filter_parse_error *pe,
+                     struct filter_pred **pred_ptr)
 {
-       int n_normal_preds = 0, n_logical_preds = 0;
-       struct postfix_elt *elt;
-       int cnt = 0;
+       struct trace_event_call *call = data;
+       struct ftrace_event_field *field;
+       struct filter_pred *pred = NULL;
+       char num_buf[24];       /* Big enough to hold an address */
+       char *field_name;
+       char q;
+       u64 val;
+       int len;
+       int ret;
+       int op;
+       int s;
+       int i = 0;
 
-       list_for_each_entry(elt, &ps->postfix, list) {
-               if (elt->op == OP_NONE) {
-                       cnt++;
-                       continue;
-               }
+       /* First find the field to associate to */
+       while (isspace(str[i]))
+               i++;
+       s = i;
 
-               if (elt->op == OP_AND || elt->op == OP_OR) {
-                       n_logical_preds++;
-                       cnt--;
-                       continue;
-               }
-               if (elt->op != OP_NOT)
-                       cnt--;
-               n_normal_preds++;
-               /* all ops should have operands */
-               if (cnt < 0)
-                       break;
-       }
+       while (isalnum(str[i]) || str[i] == '_')
+               i++;
+
+       len = i - s;
+
+       if (!len)
+               return -1;
 
-       if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) {
-               parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
+       field_name = kmemdup_nul(str + s, len, GFP_KERNEL);
+       if (!field_name)
+               return -ENOMEM;
+
+       /* Make sure that the field exists */
+
+       field = trace_find_event_field(call, field_name);
+       kfree(field_name);
+       if (!field) {
+               parse_error(pe, FILT_ERR_FIELD_NOT_FOUND, pos + i);
                return -EINVAL;
        }
 
-       return 0;
-}
+       while (isspace(str[i]))
+               i++;
 
-static int count_preds(struct filter_parse_state *ps)
-{
-       struct postfix_elt *elt;
-       int n_preds = 0;
+       /* Make sure this op is supported */
+       for (op = 0; ops[op]; op++) {
+               /* This is why '<=' must come before '<' in ops[] */
+               if (strncmp(str + i, ops[op], strlen(ops[op])) == 0)
+                       break;
+       }
 
-       list_for_each_entry(elt, &ps->postfix, list) {
-               if (elt->op == OP_NONE)
-                       continue;
-               n_preds++;
+       if (!ops[op]) {
+               parse_error(pe, FILT_ERR_INVALID_OP, pos + i);
+               goto err_free;
        }
 
-       return n_preds;
-}
+       i += strlen(ops[op]);
 
-struct check_pred_data {
-       int count;
-       int max;
-};
+       while (isspace(str[i]))
+               i++;
 
-static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred,
-                             int *err, void *data)
-{
-       struct check_pred_data *d = data;
+       s = i;
 
-       if (WARN_ON(d->count++ > d->max)) {
-               *err = -EINVAL;
-               return WALK_PRED_ABORT;
-       }
-       return WALK_PRED_DEFAULT;
-}
+       pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+       if (!pred)
+               return -ENOMEM;
 
-/*
- * The tree is walked at filtering of an event. If the tree is not correctly
- * built, it may cause an infinite loop. Check here that the tree does
- * indeed terminate.
- */
-static int check_pred_tree(struct event_filter *filter,
-                          struct filter_pred *root)
-{
-       struct check_pred_data data = {
+       pred->field = field;
+       pred->offset = field->offset;
+       pred->op = op;
+
+       if (ftrace_event_is_function(call)) {
                /*
-                * The max that we can hit a node is three times.
-                * Once going down, once coming up from left, and
-                * once coming up from right. This is more than enough
-                * since leafs are only hit a single time.
+                * Perf does things different with function events.
+                * It only allows an "ip" field, and expects a string.
+                * But the string does not need to be surrounded by quotes.
+                * If it is a string, the assigned function as a nop,
+                * (perf doesn't use it) and grab everything.
                 */
-               .max   = 3 * filter->n_preds,
-               .count = 0,
-       };
+               if (strcmp(field->name, "ip") != 0) {
+                        parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i);
+                        goto err_free;
+                }
+                pred->fn = filter_pred_none;
+
+                /*
+                 * Quotes are not required, but if they exist then we need
+                 * to read them till we hit a matching one.
+                 */
+                if (str[i] == '\'' || str[i] == '"')
+                        q = str[i];
+                else
+                        q = 0;
+
+                for (i++; str[i]; i++) {
+                        if (q && str[i] == q)
+                                break;
+                        if (!q && (str[i] == ')' || str[i] == '&' ||
+                                   str[i] == '|'))
+                                break;
+                }
+                /* Skip quotes */
+                if (q)
+                        s++;
+               len = i - s;
+               if (len >= MAX_FILTER_STR_VAL) {
+                       parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+                       goto err_free;
+               }
 
-       return walk_pred_tree(filter->preds, root,
-                             check_pred_tree_cb, &data);
-}
+               pred->regex.len = len;
+               strncpy(pred->regex.pattern, str + s, len);
+               pred->regex.pattern[len] = 0;
+
+       /* This is either a string, or an integer */
+       } else if (str[i] == '\'' || str[i] == '"') {
+               char q = str[i];
+
+               /* Make sure the op is OK for strings */
+               switch (op) {
+               case OP_NE:
+                       pred->not = 1;
+                       /* Fall through */
+               case OP_GLOB:
+               case OP_EQ:
+                       break;
+               default:
+                       parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+                       goto err_free;
+               }
 
-static int count_leafs_cb(enum move_type move, struct filter_pred *pred,
-                         int *err, void *data)
-{
-       int *count = data;
+               /* Make sure the field is OK for strings */
+               if (!is_string_field(field)) {
+                       parse_error(pe, FILT_ERR_EXPECT_DIGIT, pos + i);
+                       goto err_free;
+               }
 
-       if ((move == MOVE_DOWN) &&
-           (pred->left == FILTER_PRED_INVALID))
-               (*count)++;
+               for (i++; str[i]; i++) {
+                       if (str[i] == q)
+                               break;
+               }
+               if (!str[i]) {
+                       parse_error(pe, FILT_ERR_MISSING_QUOTE, pos + i);
+                       goto err_free;
+               }
 
-       return WALK_PRED_DEFAULT;
-}
+               /* Skip quotes */
+               s++;
+               len = i - s;
+               if (len >= MAX_FILTER_STR_VAL) {
+                       parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+                       goto err_free;
+               }
 
-static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
-{
-       int count = 0, ret;
+               pred->regex.len = len;
+               strncpy(pred->regex.pattern, str + s, len);
+               pred->regex.pattern[len] = 0;
 
-       ret = walk_pred_tree(preds, root, count_leafs_cb, &count);
-       WARN_ON(ret);
-       return count;
-}
+               filter_build_regex(pred);
 
-struct fold_pred_data {
-       struct filter_pred *root;
-       int count;
-       int children;
-};
+               if (field->filter_type == FILTER_COMM) {
+                       pred->fn = filter_pred_comm;
 
-static int fold_pred_cb(enum move_type move, struct filter_pred *pred,
-                       int *err, void *data)
-{
-       struct fold_pred_data *d = data;
-       struct filter_pred *root = d->root;
+               } else if (field->filter_type == FILTER_STATIC_STRING) {
+                       pred->fn = filter_pred_string;
+                       pred->regex.field_len = field->size;
 
-       if (move != MOVE_DOWN)
-               return WALK_PRED_DEFAULT;
-       if (pred->left != FILTER_PRED_INVALID)
-               return WALK_PRED_DEFAULT;
+               } else if (field->filter_type == FILTER_DYN_STRING)
+                       pred->fn = filter_pred_strloc;
+               else
+                       pred->fn = filter_pred_pchar;
+               /* go past the last quote */
+               i++;
 
-       if (WARN_ON(d->count == d->children)) {
-               *err = -EINVAL;
-               return WALK_PRED_ABORT;
-       }
+       } else if (isdigit(str[i])) {
 
-       pred->index &= ~FILTER_PRED_FOLD;
-       root->ops[d->count++] = pred->index;
-       return WALK_PRED_DEFAULT;
-}
+               /* Make sure the field is not a string */
+               if (is_string_field(field)) {
+                       parse_error(pe, FILT_ERR_EXPECT_STRING, pos + i);
+                       goto err_free;
+               }
 
-static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
-{
-       struct fold_pred_data data = {
-               .root  = root,
-               .count = 0,
-       };
-       int children;
+               if (op == OP_GLOB) {
+                       parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+                       goto err_free;
+               }
 
-       /* No need to keep the fold flag */
-       root->index &= ~FILTER_PRED_FOLD;
+               /* We allow 0xDEADBEEF */
+               while (isalnum(str[i]))
+                       i++;
 
-       /* If the root is a leaf then do nothing */
-       if (root->left == FILTER_PRED_INVALID)
-               return 0;
+               len = i - s;
+               /* 0xfeedfacedeadbeef is 18 chars max */
+               if (len >= sizeof(num_buf)) {
+                       parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+                       goto err_free;
+               }
 
-       /* count the children */
-       children = count_leafs(preds, &preds[root->left]);
-       children += count_leafs(preds, &preds[root->right]);
+               strncpy(num_buf, str + s, len);
+               num_buf[len] = 0;
 
-       root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL);
-       if (!root->ops)
-               return -ENOMEM;
+               /* Make sure it is a value */
+               if (field->is_signed)
+                       ret = kstrtoll(num_buf, 0, &val);
+               else
+                       ret = kstrtoull(num_buf, 0, &val);
+               if (ret) {
+                       parse_error(pe, FILT_ERR_ILLEGAL_INTVAL, pos + s);
+                       goto err_free;
+               }
 
-       root->val = children;
-       data.children = children;
-       return walk_pred_tree(preds, root, fold_pred_cb, &data);
-}
+               pred->val = val;
 
-static int fold_pred_tree_cb(enum move_type move, struct filter_pred *pred,
-                            int *err, void *data)
-{
-       struct filter_pred *preds = data;
+               if (field->filter_type == FILTER_CPU)
+                       pred->fn = filter_pred_cpu;
+               else {
+                       pred->fn = select_comparison_fn(pred->op, field->size,
+                                                       field->is_signed);
+                       if (pred->op == OP_NE)
+                               pred->not = 1;
+               }
 
-       if (move != MOVE_DOWN)
-               return WALK_PRED_DEFAULT;
-       if (!(pred->index & FILTER_PRED_FOLD))
-               return WALK_PRED_DEFAULT;
+       } else {
+               parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i);
+               goto err_free;
+       }
 
-       *err = fold_pred(preds, pred);
-       if (*err)
-               return WALK_PRED_ABORT;
+       *pred_ptr = pred;
+       return i;
 
-       /* eveyrhing below is folded, continue with parent */
-       return WALK_PRED_PARENT;
+err_free:
+       kfree(pred);
+       return -EINVAL;
 }
 
+enum {
+       TOO_MANY_CLOSE          = -1,
+       TOO_MANY_OPEN           = -2,
+       MISSING_QUOTE           = -3,
+};
+
 /*
- * To optimize the processing of the ops, if we have several "ors" or
- * "ands" together, we can put them in an array and process them all
- * together speeding up the filter logic.
+ * Read the filter string once to calculate the number of predicates
+ * as well as how deep the parentheses go.
+ *
+ * Returns:
+ *   0 - everything is fine (err is undefined)
+ *  -1 - too many ')'
+ *  -2 - too many '('
+ *  -3 - No matching quote
  */
-static int fold_pred_tree(struct event_filter *filter,
-                          struct filter_pred *root)
-{
-       return walk_pred_tree(filter->preds, root, fold_pred_tree_cb,
-                             filter->preds);
-}
-
-static int replace_preds(struct trace_event_call *call,
-                        struct event_filter *filter,
-                        struct filter_parse_state *ps,
-                        bool dry_run)
-{
-       char *operand1 = NULL, *operand2 = NULL;
-       struct filter_pred *pred;
-       struct filter_pred *root;
-       struct postfix_elt *elt;
-       struct pred_stack stack = { }; /* init to NULL */
-       int err;
-       int n_preds = 0;
-
-       n_preds = count_preds(ps);
-       if (n_preds >= MAX_FILTER_PRED) {
-               parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-               return -ENOSPC;
-       }
-
-       err = check_preds(ps);
-       if (err)
-               return err;
+static int calc_stack(const char *str, int *parens, int *preds, int *err)
+{
+       bool is_pred = false;
+       int nr_preds = 0;
+       int open = 1; /* Count the expression as "(E)" */
+       int last_quote = 0;
+       int max_open = 1;
+       int quote = 0;
+       int i;
 
-       if (!dry_run) {
-               err = __alloc_pred_stack(&stack, n_preds);
-               if (err)
-                       return err;
-               err = __alloc_preds(filter, n_preds);
-               if (err)
-                       goto fail;
-       }
+       *err = 0;
 
-       n_preds = 0;
-       list_for_each_entry(elt, &ps->postfix, list) {
-               if (elt->op == OP_NONE) {
-                       if (!operand1)
-                               operand1 = elt->operand;
-                       else if (!operand2)
-                               operand2 = elt->operand;
-                       else {
-                               parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
-                               err = -EINVAL;
-                               goto fail;
-                       }
+       for (i = 0; str[i]; i++) {
+               if (isspace(str[i]))
+                       continue;
+               if (quote) {
+                       if (str[i] == quote)
+                              quote = 0;
                        continue;
                }
 
-               if (elt->op == OP_NOT) {
-                       if (!n_preds || operand1 || operand2) {
-                               parse_error(ps, FILT_ERR_ILLEGAL_NOT_OP, 0);
-                               err = -EINVAL;
-                               goto fail;
+               switch (str[i]) {
+               case '\'':
+               case '"':
+                       quote = str[i];
+                       last_quote = i;
+                       break;
+               case '|':
+               case '&':
+                       if (str[i+1] != str[i])
+                               break;
+                       is_pred = false;
+                       continue;
+               case '(':
+                       is_pred = false;
+                       open++;
+                       if (open > max_open)
+                               max_open = open;
+                       continue;
+               case ')':
+                       is_pred = false;
+                       if (open == 1) {
+                               *err = i;
+                               return TOO_MANY_CLOSE;
                        }
-                       if (!dry_run)
-                               filter->preds[n_preds - 1].not ^= 1;
+                       open--;
                        continue;
                }
-
-               if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
-                       parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-                       err = -ENOSPC;
-                       goto fail;
+               if (!is_pred) {
+                       nr_preds++;
+                       is_pred = true;
                }
+       }
 
-               pred = create_pred(ps, call, elt->op, operand1, operand2);
-               if (!pred) {
-                       err = -EINVAL;
-                       goto fail;
-               }
+       if (quote) {
+               *err = last_quote;
+               return MISSING_QUOTE;
+       }
 
-               if (!dry_run) {
-                       err = filter_add_pred(ps, filter, pred, &stack);
-                       if (err)
-                               goto fail;
-               }
+       if (open != 1) {
+               int level = open;
 
-               operand1 = operand2 = NULL;
+               /* find the bad open */
+               for (i--; i; i--) {
+                       if (quote) {
+                               if (str[i] == quote)
+                                       quote = 0;
+                               continue;
+                       }
+                       switch (str[i]) {
+                       case '(':
+                               if (level == open) {
+                                       *err = i;
+                                       return TOO_MANY_OPEN;
+                               }
+                               level--;
+                               break;
+                       case ')':
+                               level++;
+                               break;
+                       case '\'':
+                       case '"':
+                               quote = str[i];
+                               break;
+                       }
+               }
+               /* First character is the '(' with missing ')' */
+               *err = 0;
+               return TOO_MANY_OPEN;
        }
 
-       if (!dry_run) {
-               /* We should have one item left on the stack */
-               pred = __pop_pred_stack(&stack);
-               if (!pred)
-                       return -EINVAL;
-               /* This item is where we start from in matching */
-               root = pred;
-               /* Make sure the stack is empty */
-               pred = __pop_pred_stack(&stack);
-               if (WARN_ON(pred)) {
-                       err = -EINVAL;
-                       filter->root = NULL;
-                       goto fail;
+       /* Set the size of the required stacks */
+       *parens = max_open;
+       *preds = nr_preds;
+       return 0;
+}
+
+static int process_preds(struct trace_event_call *call,
+                        const char *filter_string,
+                        struct event_filter *filter,
+                        struct filter_parse_error *pe)
+{
+       struct prog_entry *prog;
+       int nr_parens;
+       int nr_preds;
+       int index;
+       int ret;
+
+       ret = calc_stack(filter_string, &nr_parens, &nr_preds, &index);
+       if (ret < 0) {
+               switch (ret) {
+               case MISSING_QUOTE:
+                       parse_error(pe, FILT_ERR_MISSING_QUOTE, index);
+                       break;
+               case TOO_MANY_OPEN:
+                       parse_error(pe, FILT_ERR_TOO_MANY_OPEN, index);
+                       break;
+               default:
+                       parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, index);
                }
-               err = check_pred_tree(filter, root);
-               if (err)
-                       goto fail;
-
-               /* Optimize the tree */
-               err = fold_pred_tree(filter, root);
-               if (err)
-                       goto fail;
-
-               /* We don't set root until we know it works */
-               barrier();
-               filter->root = root;
+               return ret;
        }
 
-       err = 0;
-fail:
-       __free_pred_stack(&stack);
-       return err;
+       if (!nr_preds) {
+               prog = NULL;
+       } else {
+               prog = predicate_parse(filter_string, nr_parens, nr_preds,
+                              parse_pred, call, pe);
+               if (IS_ERR(prog))
+                       return PTR_ERR(prog);
+       }
+       rcu_assign_pointer(filter->prog, prog);
+       return 0;
 }
 
 static inline void event_set_filtered_flag(struct trace_event_file *file)
@@ -1780,72 +1558,53 @@ struct filter_list {
        struct event_filter     *filter;
 };
 
-static int replace_system_preds(struct trace_subsystem_dir *dir,
+static int process_system_preds(struct trace_subsystem_dir *dir,
                                struct trace_array *tr,
-                               struct filter_parse_state *ps,
+                               struct filter_parse_error *pe,
                                char *filter_string)
 {
        struct trace_event_file *file;
        struct filter_list *filter_item;
+       struct event_filter *filter = NULL;
        struct filter_list *tmp;
        LIST_HEAD(filter_list);
        bool fail = true;
        int err;
 
        list_for_each_entry(file, &tr->events, list) {
-               if (file->system != dir)
-                       continue;
-
-               /*
-                * Try to see if the filter can be applied
-                *  (filter arg is ignored on dry_run)
-                */
-               err = replace_preds(file->event_call, NULL, ps, true);
-               if (err)
-                       event_set_no_set_filter_flag(file);
-               else
-                       event_clear_no_set_filter_flag(file);
-       }
-
-       list_for_each_entry(file, &tr->events, list) {
-               struct event_filter *filter;
 
                if (file->system != dir)
                        continue;
 
-               if (event_no_set_filter_flag(file))
-                       continue;
-
-               filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
-               if (!filter_item)
-                       goto fail_mem;
-
-               list_add_tail(&filter_item->list, &filter_list);
-
-               filter_item->filter = __alloc_filter();
-               if (!filter_item->filter)
+               filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+               if (!filter)
                        goto fail_mem;
-               filter = filter_item->filter;
 
-               /* Can only fail on no memory */
-               err = replace_filter_string(filter, filter_string);
-               if (err)
+               filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+               if (!filter->filter_string)
                        goto fail_mem;
 
-               err = replace_preds(file->event_call, filter, ps, false);
+               err = process_preds(file->event_call, filter_string, filter, pe);
                if (err) {
                        filter_disable(file);
-                       parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
-                       append_filter_err(ps, filter);
+                       parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+                       append_filter_err(pe, filter);
                } else
                        event_set_filtered_flag(file);
+
+
+               filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
+               if (!filter_item)
+                       goto fail_mem;
+
+               list_add_tail(&filter_item->list, &filter_list);
                /*
                 * Regardless of if this returned an error, we still
                 * replace the filter for the call.
                 */
-               filter = event_filter(file);
-               event_set_filter(file, filter_item->filter);
-               filter_item->filter = filter;
+               filter_item->filter = event_filter(file);
+               event_set_filter(file, filter);
+               filter = NULL;
 
                fail = false;
        }
@@ -1871,9 +1630,10 @@ static int replace_system_preds(struct trace_subsystem_dir *dir,
                list_del(&filter_item->list);
                kfree(filter_item);
        }
-       parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+       parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
        return -EINVAL;
  fail_mem:
+       kfree(filter);
        /* If any call succeeded, we still need to sync */
        if (!fail)
                synchronize_sched();
@@ -1885,47 +1645,42 @@ static int replace_system_preds(struct trace_subsystem_dir *dir,
        return -ENOMEM;
 }
 
-static int create_filter_start(char *filter_str, bool set_str,
-                              struct filter_parse_state **psp,
+static int create_filter_start(char *filter_string, bool set_str,
+                              struct filter_parse_error **pse,
                               struct event_filter **filterp)
 {
        struct event_filter *filter;
-       struct filter_parse_state *ps = NULL;
+       struct filter_parse_error *pe = NULL;
        int err = 0;
 
-       WARN_ON_ONCE(*psp || *filterp);
+       if (WARN_ON_ONCE(*pse || *filterp))
+               return -EINVAL;
 
-       /* allocate everything, and if any fails, free all and fail */
-       filter = __alloc_filter();
-       if (filter && set_str)
-               err = replace_filter_string(filter, filter_str);
+       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+       if (filter && set_str) {
+               filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+               if (!filter->filter_string)
+                       err = -ENOMEM;
+       }
 
-       ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+       pe = kzalloc(sizeof(*pe), GFP_KERNEL);
 
-       if (!filter || !ps || err) {
-               kfree(ps);
+       if (!filter || !pe || err) {
+               kfree(pe);
                __free_filter(filter);
                return -ENOMEM;
        }
 
        /* we're committed to creating a new filter */
        *filterp = filter;
-       *psp = ps;
+       *pse = pe;
 
-       parse_init(ps, filter_ops, filter_str);
-       err = filter_parse(ps);
-       if (err && set_str)
-               append_filter_err(ps, filter);
-       return err;
+       return 0;
 }
 
-static void create_filter_finish(struct filter_parse_state *ps)
+static void create_filter_finish(struct filter_parse_error *pe)
 {
-       if (ps) {
-               filter_opstack_clear(ps);
-               postfix_clear(ps);
-               kfree(ps);
-       }
+       kfree(pe);
 }
 
 /**
@@ -1945,24 +1700,20 @@ static void create_filter_finish(struct filter_parse_state *ps)
  * freeing it.
  */
 static int create_filter(struct trace_event_call *call,
-                        char *filter_str, bool set_str,
+                        char *filter_string, bool set_str,
                         struct event_filter **filterp)
 {
+       struct filter_parse_error *pe = NULL;
        struct event_filter *filter = NULL;
-       struct filter_parse_state *ps = NULL;
        int err;
 
-       err = create_filter_start(filter_str, set_str, &ps, &filter);
-       if (!err) {
-               err = replace_preds(call, filter, ps, false);
-               if (err && set_str)
-                       append_filter_err(ps, filter);
-       }
-       if (err && !set_str) {
-               free_event_filter(filter);
-               filter = NULL;
-       }
-       create_filter_finish(ps);
+       err = create_filter_start(filter_string, set_str, &pe, &filter);
+       if (err)
+               return err;
+
+       err = process_preds(call, filter_string, filter, pe);
+       if (err && set_str)
+               append_filter_err(pe, filter);
 
        *filterp = filter;
        return err;
@@ -1989,21 +1740,21 @@ static int create_system_filter(struct trace_subsystem_dir *dir,
                                char *filter_str, struct event_filter **filterp)
 {
        struct event_filter *filter = NULL;
-       struct filter_parse_state *ps = NULL;
+       struct filter_parse_error *pe = NULL;
        int err;
 
-       err = create_filter_start(filter_str, true, &ps, &filter);
+       err = create_filter_start(filter_str, true, &pe, &filter);
        if (!err) {
-               err = replace_system_preds(dir, tr, ps, filter_str);
+               err = process_system_preds(dir, tr, pe, filter_str);
                if (!err) {
                        /* System filters just show a default message */
                        kfree(filter->filter_string);
                        filter->filter_string = NULL;
                } else {
-                       append_filter_err(ps, filter);
+                       append_filter_err(pe, filter);
                }
        }
-       create_filter_finish(ps);
+       create_filter_finish(pe);
 
        *filterp = filter;
        return err;
@@ -2186,66 +1937,80 @@ static int __ftrace_function_set_filter(int filter, char *buf, int len,
        return ret;
 }
 
-static int ftrace_function_check_pred(struct filter_pred *pred, int leaf)
+static int ftrace_function_check_pred(struct filter_pred *pred)
 {
        struct ftrace_event_field *field = pred->field;
 
-       if (leaf) {
-               /*
-                * Check the leaf predicate for function trace, verify:
-                *  - only '==' and '!=' is used
-                *  - the 'ip' field is used
-                */
-               if ((pred->op != OP_EQ) && (pred->op != OP_NE))
-                       return -EINVAL;
+       /*
+        * Check the predicate for function trace, verify:
+        *  - only '==' and '!=' is used
+        *  - the 'ip' field is used
+        */
+       if ((pred->op != OP_EQ) && (pred->op != OP_NE))
+               return -EINVAL;
 
-               if (strcmp(field->name, "ip"))
-                       return -EINVAL;
-       } else {
-               /*
-                * Check the non leaf predicate for function trace, verify:
-                *  - only '||' is used
-               */
-               if (pred->op != OP_OR)
-                       return -EINVAL;
-       }
+       if (strcmp(field->name, "ip"))
+               return -EINVAL;
 
        return 0;
 }
 
-static int ftrace_function_set_filter_cb(enum move_type move,
-                                        struct filter_pred *pred,
-                                        int *err, void *data)
+static int ftrace_function_set_filter_pred(struct filter_pred *pred,
+                                          struct function_filter_data *data)
 {
+       int ret;
+
        /* Checking the node is valid for function trace. */
-       if ((move != MOVE_DOWN) ||
-           (pred->left != FILTER_PRED_INVALID)) {
-               *err = ftrace_function_check_pred(pred, 0);
-       } else {
-               *err = ftrace_function_check_pred(pred, 1);
-               if (*err)
-                       return WALK_PRED_ABORT;
-
-               *err = __ftrace_function_set_filter(pred->op == OP_EQ,
-                                                   pred->regex.pattern,
-                                                   pred->regex.len,
-                                                   data);
-       }
+       ret = ftrace_function_check_pred(pred);
+       if (ret)
+               return ret;
+
+       return __ftrace_function_set_filter(pred->op == OP_EQ,
+                                           pred->regex.pattern,
+                                           pred->regex.len,
+                                           data);
+}
+
+static bool is_or(struct prog_entry *prog, int i)
+{
+       int target;
+
+       /*
+        * Only "||" is allowed for function events, thus,
+        * all true branches should jump to true, and any
+        * false branch should jump to false.
+        */
+       target = prog[i].target + 1;
+       /* True and false have NULL preds (all prog entries should jump to one */
+       if (prog[target].pred)
+               return false;
 
-       return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT;
+       /* prog[target].target is 1 for TRUE, 0 for FALSE */
+       return prog[i].when_to_branch == prog[target].target;
 }
 
 static int ftrace_function_set_filter(struct perf_event *event,
                                      struct event_filter *filter)
 {
+       struct prog_entry *prog = rcu_dereference_protected(filter->prog,
+                                               lockdep_is_held(&event_mutex));
        struct function_filter_data data = {
                .first_filter  = 1,
                .first_notrace = 1,
                .ops           = &event->ftrace_ops,
        };
+       int i;
 
-       return walk_pred_tree(filter->preds, filter->root,
-                             ftrace_function_set_filter_cb, &data);
+       for (i = 0; prog[i].pred; i++) {
+               struct filter_pred *pred = prog[i].pred;
+
+               if (!is_or(prog, i))
+                       return -EINVAL;
+
+               if (ftrace_function_set_filter_pred(pred, &data) < 0)
+                       return -EINVAL;
+       }
+       return 0;
 }
 #else
 static int ftrace_function_set_filter(struct perf_event *event,
@@ -2388,26 +2153,28 @@ static int test_pred_visited_fn(struct filter_pred *pred, void *event)
        return 1;
 }
 
-static int test_walk_pred_cb(enum move_type move, struct filter_pred *pred,
-                            int *err, void *data)
+static void update_pred_fn(struct event_filter *filter, char *fields)
 {
-       char *fields = data;
+       struct prog_entry *prog = rcu_dereference_protected(filter->prog,
+                                               lockdep_is_held(&event_mutex));
+       int i;
 
-       if ((move == MOVE_DOWN) &&
-           (pred->left == FILTER_PRED_INVALID)) {
+       for (i = 0; prog[i].pred; i++) {
+               struct filter_pred *pred = prog[i].pred;
                struct ftrace_event_field *field = pred->field;
 
+               WARN_ON_ONCE(!pred->fn);
+
                if (!field) {
-                       WARN(1, "all leafs should have field defined");
-                       return WALK_PRED_DEFAULT;
+                       WARN_ONCE(1, "all leafs should have field defined %d", i);
+                       continue;
                }
+
                if (!strchr(fields, *field->name))
-                       return WALK_PRED_DEFAULT;
+                       continue;
 
-               WARN_ON(!pred->fn);
                pred->fn = test_pred_visited_fn;
        }
-       return WALK_PRED_DEFAULT;
 }
 
 static __init int ftrace_test_event_filter(void)
@@ -2431,20 +2198,22 @@ static __init int ftrace_test_event_filter(void)
                        break;
                }
 
+               /* Needed to dereference filter->prog */
+               mutex_lock(&event_mutex);
                /*
                 * The preemption disabling is not really needed for self
                 * tests, but the rcu dereference will complain without it.
                 */
                preempt_disable();
                if (*d->not_visited)
-                       walk_pred_tree(filter->preds, filter->root,
-                                      test_walk_pred_cb,
-                                      d->not_visited);
+                       update_pred_fn(filter, d->not_visited);
 
                test_pred_visited = 0;
                err = filter_match_preds(filter, &d->rec);
                preempt_enable();
 
+               mutex_unlock(&event_mutex);
+
                __free_filter(filter);
 
                if (test_pred_visited) {
index 1e1558c99d56090eea6a321c11d7790334fb0e24..0d7b3ffbecc2a03a7604509e2f19cdd3df5e4973 100644 (file)
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/rculist.h>
+#include <linux/tracefs.h>
 
 #include "tracing_map.h"
 #include "trace.h"
 
+#define SYNTH_SYSTEM           "synthetic"
+#define SYNTH_FIELDS_MAX       16
+
+#define STR_VAR_LEN_MAX                32 /* must be multiple of sizeof(u64) */
+
 struct hist_field;
 
-typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event);
+typedef u64 (*hist_field_fn_t) (struct hist_field *field,
+                               struct tracing_map_elt *elt,
+                               struct ring_buffer_event *rbe,
+                               void *event);
 
 #define HIST_FIELD_OPERANDS_MAX        2
+#define HIST_FIELDS_MAX                (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
+#define HIST_ACTIONS_MAX       8
+
+enum field_op_id {
+       FIELD_OP_NONE,
+       FIELD_OP_PLUS,
+       FIELD_OP_MINUS,
+       FIELD_OP_UNARY_MINUS,
+};
+
+struct hist_var {
+       char                            *name;
+       struct hist_trigger_data        *hist_data;
+       unsigned int                    idx;
+};
 
 struct hist_field {
        struct ftrace_event_field       *field;
@@ -37,27 +61,49 @@ struct hist_field {
        unsigned int                    size;
        unsigned int                    offset;
        unsigned int                    is_signed;
+       const char                      *type;
        struct hist_field               *operands[HIST_FIELD_OPERANDS_MAX];
+       struct hist_trigger_data        *hist_data;
+       struct hist_var                 var;
+       enum field_op_id                operator;
+       char                            *system;
+       char                            *event_name;
+       char                            *name;
+       unsigned int                    var_idx;
+       unsigned int                    var_ref_idx;
+       bool                            read_once;
 };
 
-static u64 hist_field_none(struct hist_field *field, void *event)
+static u64 hist_field_none(struct hist_field *field,
+                          struct tracing_map_elt *elt,
+                          struct ring_buffer_event *rbe,
+                          void *event)
 {
        return 0;
 }
 
-static u64 hist_field_counter(struct hist_field *field, void *event)
+static u64 hist_field_counter(struct hist_field *field,
+                             struct tracing_map_elt *elt,
+                             struct ring_buffer_event *rbe,
+                             void *event)
 {
        return 1;
 }
 
-static u64 hist_field_string(struct hist_field *hist_field, void *event)
+static u64 hist_field_string(struct hist_field *hist_field,
+                            struct tracing_map_elt *elt,
+                            struct ring_buffer_event *rbe,
+                            void *event)
 {
        char *addr = (char *)(event + hist_field->field->offset);
 
        return (u64)(unsigned long)addr;
 }
 
-static u64 hist_field_dynstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_dynstring(struct hist_field *hist_field,
+                               struct tracing_map_elt *elt,
+                               struct ring_buffer_event *rbe,
+                               void *event)
 {
        u32 str_item = *(u32 *)(event + hist_field->field->offset);
        int str_loc = str_item & 0xffff;
@@ -66,24 +112,74 @@ static u64 hist_field_dynstring(struct hist_field *hist_field, void *event)
        return (u64)(unsigned long)addr;
 }
 
-static u64 hist_field_pstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_pstring(struct hist_field *hist_field,
+                             struct tracing_map_elt *elt,
+                             struct ring_buffer_event *rbe,
+                             void *event)
 {
        char **addr = (char **)(event + hist_field->field->offset);
 
        return (u64)(unsigned long)*addr;
 }
 
-static u64 hist_field_log2(struct hist_field *hist_field, void *event)
+static u64 hist_field_log2(struct hist_field *hist_field,
+                          struct tracing_map_elt *elt,
+                          struct ring_buffer_event *rbe,
+                          void *event)
 {
        struct hist_field *operand = hist_field->operands[0];
 
-       u64 val = operand->fn(operand, event);
+       u64 val = operand->fn(operand, elt, rbe, event);
 
        return (u64) ilog2(roundup_pow_of_two(val));
 }
 
+static u64 hist_field_plus(struct hist_field *hist_field,
+                          struct tracing_map_elt *elt,
+                          struct ring_buffer_event *rbe,
+                          void *event)
+{
+       struct hist_field *operand1 = hist_field->operands[0];
+       struct hist_field *operand2 = hist_field->operands[1];
+
+       u64 val1 = operand1->fn(operand1, elt, rbe, event);
+       u64 val2 = operand2->fn(operand2, elt, rbe, event);
+
+       return val1 + val2;
+}
+
+static u64 hist_field_minus(struct hist_field *hist_field,
+                           struct tracing_map_elt *elt,
+                           struct ring_buffer_event *rbe,
+                           void *event)
+{
+       struct hist_field *operand1 = hist_field->operands[0];
+       struct hist_field *operand2 = hist_field->operands[1];
+
+       u64 val1 = operand1->fn(operand1, elt, rbe, event);
+       u64 val2 = operand2->fn(operand2, elt, rbe, event);
+
+       return val1 - val2;
+}
+
+static u64 hist_field_unary_minus(struct hist_field *hist_field,
+                                 struct tracing_map_elt *elt,
+                                 struct ring_buffer_event *rbe,
+                                 void *event)
+{
+       struct hist_field *operand = hist_field->operands[0];
+
+       s64 sval = (s64)operand->fn(operand, elt, rbe, event);
+       u64 val = (u64)-sval;
+
+       return val;
+}
+
 #define DEFINE_HIST_FIELD_FN(type)                                     \
-static u64 hist_field_##type(struct hist_field *hist_field, void *event)\
+       static u64 hist_field_##type(struct hist_field *hist_field,     \
+                                    struct tracing_map_elt *elt,       \
+                                    struct ring_buffer_event *rbe,     \
+                                    void *event)                       \
 {                                                                      \
        type *addr = (type *)(event + hist_field->field->offset);       \
                                                                        \
@@ -126,6 +222,19 @@ enum hist_field_flags {
        HIST_FIELD_FL_SYSCALL           = 1 << 7,
        HIST_FIELD_FL_STACKTRACE        = 1 << 8,
        HIST_FIELD_FL_LOG2              = 1 << 9,
+       HIST_FIELD_FL_TIMESTAMP         = 1 << 10,
+       HIST_FIELD_FL_TIMESTAMP_USECS   = 1 << 11,
+       HIST_FIELD_FL_VAR               = 1 << 12,
+       HIST_FIELD_FL_EXPR              = 1 << 13,
+       HIST_FIELD_FL_VAR_REF           = 1 << 14,
+       HIST_FIELD_FL_CPU               = 1 << 15,
+       HIST_FIELD_FL_ALIAS             = 1 << 16,
+};
+
+struct var_defs {
+       unsigned int    n_vars;
+       char            *name[TRACING_MAP_VARS_MAX];
+       char            *expr[TRACING_MAP_VARS_MAX];
 };
 
 struct hist_trigger_attrs {
@@ -133,332 +242,3583 @@ struct hist_trigger_attrs {
        char            *vals_str;
        char            *sort_key_str;
        char            *name;
+       char            *clock;
        bool            pause;
        bool            cont;
        bool            clear;
+       bool            ts_in_usecs;
        unsigned int    map_bits;
+
+       char            *assignment_str[TRACING_MAP_VARS_MAX];
+       unsigned int    n_assignments;
+
+       char            *action_str[HIST_ACTIONS_MAX];
+       unsigned int    n_actions;
+
+       struct var_defs var_defs;
+};
+
+struct field_var {
+       struct hist_field       *var;
+       struct hist_field       *val;
+};
+
+struct field_var_hist {
+       struct hist_trigger_data        *hist_data;
+       char                            *cmd;
 };
 
 struct hist_trigger_data {
-       struct hist_field               *fields[TRACING_MAP_FIELDS_MAX];
+       struct hist_field               *fields[HIST_FIELDS_MAX];
        unsigned int                    n_vals;
        unsigned int                    n_keys;
        unsigned int                    n_fields;
+       unsigned int                    n_vars;
        unsigned int                    key_size;
        struct tracing_map_sort_key     sort_keys[TRACING_MAP_SORT_KEYS_MAX];
        unsigned int                    n_sort_keys;
        struct trace_event_file         *event_file;
        struct hist_trigger_attrs       *attrs;
        struct tracing_map              *map;
+       bool                            enable_timestamps;
+       bool                            remove;
+       struct hist_field               *var_refs[TRACING_MAP_VARS_MAX];
+       unsigned int                    n_var_refs;
+
+       struct action_data              *actions[HIST_ACTIONS_MAX];
+       unsigned int                    n_actions;
+
+       struct hist_field               *synth_var_refs[SYNTH_FIELDS_MAX];
+       unsigned int                    n_synth_var_refs;
+       struct field_var                *field_vars[SYNTH_FIELDS_MAX];
+       unsigned int                    n_field_vars;
+       unsigned int                    n_field_var_str;
+       struct field_var_hist           *field_var_hists[SYNTH_FIELDS_MAX];
+       unsigned int                    n_field_var_hists;
+
+       struct field_var                *max_vars[SYNTH_FIELDS_MAX];
+       unsigned int                    n_max_vars;
+       unsigned int                    n_max_var_str;
 };
 
-static const char *hist_field_name(struct hist_field *field,
-                                  unsigned int level)
-{
-       const char *field_name = "";
+struct synth_field {
+       char *type;
+       char *name;
+       size_t size;
+       bool is_signed;
+       bool is_string;
+};
 
-       if (level > 1)
-               return field_name;
+struct synth_event {
+       struct list_head                        list;
+       int                                     ref;
+       char                                    *name;
+       struct synth_field                      **fields;
+       unsigned int                            n_fields;
+       unsigned int                            n_u64;
+       struct trace_event_class                class;
+       struct trace_event_call                 call;
+       struct tracepoint                       *tp;
+};
 
-       if (field->field)
-               field_name = field->field->name;
-       else if (field->flags & HIST_FIELD_FL_LOG2)
-               field_name = hist_field_name(field->operands[0], ++level);
+struct action_data;
+
+typedef void (*action_fn_t) (struct hist_trigger_data *hist_data,
+                            struct tracing_map_elt *elt, void *rec,
+                            struct ring_buffer_event *rbe,
+                            struct action_data *data, u64 *var_ref_vals);
+
+struct action_data {
+       action_fn_t             fn;
+       unsigned int            n_params;
+       char                    *params[SYNTH_FIELDS_MAX];
+
+       union {
+               struct {
+                       unsigned int            var_ref_idx;
+                       char                    *match_event;
+                       char                    *match_event_system;
+                       char                    *synth_event_name;
+                       struct synth_event      *synth_event;
+               } onmatch;
+
+               struct {
+                       char                    *var_str;
+                       char                    *fn_name;
+                       unsigned int            max_var_ref_idx;
+                       struct hist_field       *max_var;
+                       struct hist_field       *var;
+               } onmax;
+       };
+};
 
-       if (field_name == NULL)
-               field_name = "";
 
-       return field_name;
+static char last_hist_cmd[MAX_FILTER_STR_VAL];
+static char hist_err_str[MAX_FILTER_STR_VAL];
+
+static void last_cmd_set(char *str)
+{
+       if (!str)
+               return;
+
+       strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1);
 }
 
-static hist_field_fn_t select_value_fn(int field_size, int field_is_signed)
+static void hist_err(char *str, char *var)
 {
-       hist_field_fn_t fn = NULL;
+       int maxlen = MAX_FILTER_STR_VAL - 1;
 
-       switch (field_size) {
-       case 8:
-               if (field_is_signed)
-                       fn = hist_field_s64;
-               else
-                       fn = hist_field_u64;
-               break;
-       case 4:
-               if (field_is_signed)
-                       fn = hist_field_s32;
-               else
-                       fn = hist_field_u32;
-               break;
-       case 2:
-               if (field_is_signed)
-                       fn = hist_field_s16;
-               else
-                       fn = hist_field_u16;
-               break;
-       case 1:
-               if (field_is_signed)
-                       fn = hist_field_s8;
-               else
-                       fn = hist_field_u8;
-               break;
-       }
+       if (!str)
+               return;
 
-       return fn;
+       if (strlen(hist_err_str))
+               return;
+
+       if (!var)
+               var = "";
+
+       if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen)
+               return;
+
+       strcat(hist_err_str, str);
+       strcat(hist_err_str, var);
 }
 
-static int parse_map_size(char *str)
+static void hist_err_event(char *str, char *system, char *event, char *var)
 {
-       unsigned long size, map_bits;
-       int ret;
+       char err[MAX_FILTER_STR_VAL];
 
-       strsep(&str, "=");
-       if (!str) {
-               ret = -EINVAL;
-               goto out;
-       }
+       if (system && var)
+               snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var);
+       else if (system)
+               snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event);
+       else
+               strncpy(err, var, MAX_FILTER_STR_VAL);
 
-       ret = kstrtoul(str, 0, &size);
-       if (ret)
-               goto out;
+       hist_err(str, err);
+}
 
-       map_bits = ilog2(roundup_pow_of_two(size));
-       if (map_bits < TRACING_MAP_BITS_MIN ||
-           map_bits > TRACING_MAP_BITS_MAX)
-               ret = -EINVAL;
-       else
-               ret = map_bits;
- out:
-       return ret;
+static void hist_err_clear(void)
+{
+       hist_err_str[0] = '\0';
 }
 
-static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs)
+static bool have_hist_err(void)
 {
-       if (!attrs)
-               return;
+       if (strlen(hist_err_str))
+               return true;
 
-       kfree(attrs->name);
-       kfree(attrs->sort_key_str);
-       kfree(attrs->keys_str);
-       kfree(attrs->vals_str);
-       kfree(attrs);
+       return false;
 }
 
-static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
-{
-       struct hist_trigger_attrs *attrs;
-       int ret = 0;
+static LIST_HEAD(synth_event_list);
+static DEFINE_MUTEX(synth_event_mutex);
 
-       attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
-       if (!attrs)
-               return ERR_PTR(-ENOMEM);
+struct synth_trace_event {
+       struct trace_entry      ent;
+       u64                     fields[];
+};
 
-       while (trigger_str) {
-               char *str = strsep(&trigger_str, ":");
+static int synth_event_define_fields(struct trace_event_call *call)
+{
+       struct synth_trace_event trace;
+       int offset = offsetof(typeof(trace), fields);
+       struct synth_event *event = call->data;
+       unsigned int i, size, n_u64;
+       char *name, *type;
+       bool is_signed;
+       int ret = 0;
 
-               if ((strncmp(str, "key=", strlen("key=")) == 0) ||
-                   (strncmp(str, "keys=", strlen("keys=")) == 0))
-                       attrs->keys_str = kstrdup(str, GFP_KERNEL);
-               else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
-                        (strncmp(str, "vals=", strlen("vals=")) == 0) ||
-                        (strncmp(str, "values=", strlen("values=")) == 0))
-                       attrs->vals_str = kstrdup(str, GFP_KERNEL);
-               else if (strncmp(str, "sort=", strlen("sort=")) == 0)
-                       attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
-               else if (strncmp(str, "name=", strlen("name=")) == 0)
-                       attrs->name = kstrdup(str, GFP_KERNEL);
-               else if (strcmp(str, "pause") == 0)
-                       attrs->pause = true;
-               else if ((strcmp(str, "cont") == 0) ||
-                        (strcmp(str, "continue") == 0))
-                       attrs->cont = true;
-               else if (strcmp(str, "clear") == 0)
-                       attrs->clear = true;
-               else if (strncmp(str, "size=", strlen("size=")) == 0) {
-                       int map_bits = parse_map_size(str);
+       for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
+               size = event->fields[i]->size;
+               is_signed = event->fields[i]->is_signed;
+               type = event->fields[i]->type;
+               name = event->fields[i]->name;
+               ret = trace_define_field(call, type, name, offset, size,
+                                        is_signed, FILTER_OTHER);
+               if (ret)
+                       break;
 
-                       if (map_bits < 0) {
-                               ret = map_bits;
-                               goto free;
-                       }
-                       attrs->map_bits = map_bits;
+               if (event->fields[i]->is_string) {
+                       offset += STR_VAR_LEN_MAX;
+                       n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
                } else {
-                       ret = -EINVAL;
-                       goto free;
+                       offset += sizeof(u64);
+                       n_u64++;
                }
        }
 
-       if (!attrs->keys_str) {
-               ret = -EINVAL;
-               goto free;
-       }
+       event->n_u64 = n_u64;
 
-       return attrs;
- free:
-       destroy_hist_trigger_attrs(attrs);
+       return ret;
+}
 
-       return ERR_PTR(ret);
+static bool synth_field_signed(char *type)
+{
+       if (strncmp(type, "u", 1) == 0)
+               return false;
+
+       return true;
 }
 
-static inline void save_comm(char *comm, struct task_struct *task)
+static int synth_field_is_string(char *type)
 {
-       if (!task->pid) {
-               strcpy(comm, "<idle>");
-               return;
-       }
+       if (strstr(type, "char[") != NULL)
+               return true;
 
-       if (WARN_ON_ONCE(task->pid < 0)) {
-               strcpy(comm, "<XXX>");
-               return;
-       }
+       return false;
+}
 
-       memcpy(comm, task->comm, TASK_COMM_LEN);
+static int synth_field_string_size(char *type)
+{
+       char buf[4], *end, *start;
+       unsigned int len;
+       int size, err;
+
+       start = strstr(type, "char[");
+       if (start == NULL)
+               return -EINVAL;
+       start += strlen("char[");
+
+       end = strchr(type, ']');
+       if (!end || end < start)
+               return -EINVAL;
+
+       len = end - start;
+       if (len > 3)
+               return -EINVAL;
+
+       strncpy(buf, start, len);
+       buf[len] = '\0';
+
+       err = kstrtouint(buf, 0, &size);
+       if (err)
+               return err;
+
+       if (size > STR_VAR_LEN_MAX)
+               return -EINVAL;
+
+       return size;
 }
 
-static void hist_trigger_elt_comm_free(struct tracing_map_elt *elt)
+static int synth_field_size(char *type)
 {
-       kfree((char *)elt->private_data);
+       int size = 0;
+
+       if (strcmp(type, "s64") == 0)
+               size = sizeof(s64);
+       else if (strcmp(type, "u64") == 0)
+               size = sizeof(u64);
+       else if (strcmp(type, "s32") == 0)
+               size = sizeof(s32);
+       else if (strcmp(type, "u32") == 0)
+               size = sizeof(u32);
+       else if (strcmp(type, "s16") == 0)
+               size = sizeof(s16);
+       else if (strcmp(type, "u16") == 0)
+               size = sizeof(u16);
+       else if (strcmp(type, "s8") == 0)
+               size = sizeof(s8);
+       else if (strcmp(type, "u8") == 0)
+               size = sizeof(u8);
+       else if (strcmp(type, "char") == 0)
+               size = sizeof(char);
+       else if (strcmp(type, "unsigned char") == 0)
+               size = sizeof(unsigned char);
+       else if (strcmp(type, "int") == 0)
+               size = sizeof(int);
+       else if (strcmp(type, "unsigned int") == 0)
+               size = sizeof(unsigned int);
+       else if (strcmp(type, "long") == 0)
+               size = sizeof(long);
+       else if (strcmp(type, "unsigned long") == 0)
+               size = sizeof(unsigned long);
+       else if (strcmp(type, "pid_t") == 0)
+               size = sizeof(pid_t);
+       else if (synth_field_is_string(type))
+               size = synth_field_string_size(type);
+
+       return size;
 }
 
-static int hist_trigger_elt_comm_alloc(struct tracing_map_elt *elt)
+static const char *synth_field_fmt(char *type)
 {
-       struct hist_trigger_data *hist_data = elt->map->private_data;
-       struct hist_field *key_field;
-       unsigned int i;
+       const char *fmt = "%llu";
+
+       if (strcmp(type, "s64") == 0)
+               fmt = "%lld";
+       else if (strcmp(type, "u64") == 0)
+               fmt = "%llu";
+       else if (strcmp(type, "s32") == 0)
+               fmt = "%d";
+       else if (strcmp(type, "u32") == 0)
+               fmt = "%u";
+       else if (strcmp(type, "s16") == 0)
+               fmt = "%d";
+       else if (strcmp(type, "u16") == 0)
+               fmt = "%u";
+       else if (strcmp(type, "s8") == 0)
+               fmt = "%d";
+       else if (strcmp(type, "u8") == 0)
+               fmt = "%u";
+       else if (strcmp(type, "char") == 0)
+               fmt = "%d";
+       else if (strcmp(type, "unsigned char") == 0)
+               fmt = "%u";
+       else if (strcmp(type, "int") == 0)
+               fmt = "%d";
+       else if (strcmp(type, "unsigned int") == 0)
+               fmt = "%u";
+       else if (strcmp(type, "long") == 0)
+               fmt = "%ld";
+       else if (strcmp(type, "unsigned long") == 0)
+               fmt = "%lu";
+       else if (strcmp(type, "pid_t") == 0)
+               fmt = "%d";
+       else if (synth_field_is_string(type))
+               fmt = "%s";
+
+       return fmt;
+}
 
-       for_each_hist_key_field(i, hist_data) {
-               key_field = hist_data->fields[i];
+static enum print_line_t print_synth_event(struct trace_iterator *iter,
+                                          int flags,
+                                          struct trace_event *event)
+{
+       struct trace_array *tr = iter->tr;
+       struct trace_seq *s = &iter->seq;
+       struct synth_trace_event *entry;
+       struct synth_event *se;
+       unsigned int i, n_u64;
+       char print_fmt[32];
+       const char *fmt;
 
-               if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
-                       unsigned int size = TASK_COMM_LEN + 1;
+       entry = (struct synth_trace_event *)iter->ent;
+       se = container_of(event, struct synth_event, call.event);
 
-                       elt->private_data = kzalloc(size, GFP_KERNEL);
-                       if (!elt->private_data)
-                               return -ENOMEM;
-                       break;
+       trace_seq_printf(s, "%s: ", se->name);
+
+       for (i = 0, n_u64 = 0; i < se->n_fields; i++) {
+               if (trace_seq_has_overflowed(s))
+                       goto end;
+
+               fmt = synth_field_fmt(se->fields[i]->type);
+
+               /* parameter types */
+               if (tr->trace_flags & TRACE_ITER_VERBOSE)
+                       trace_seq_printf(s, "%s ", fmt);
+
+               snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt);
+
+               /* parameter values */
+               if (se->fields[i]->is_string) {
+                       trace_seq_printf(s, print_fmt, se->fields[i]->name,
+                                        (char *)&entry->fields[n_u64],
+                                        i == se->n_fields - 1 ? "" : " ");
+                       n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+               } else {
+                       trace_seq_printf(s, print_fmt, se->fields[i]->name,
+                                        entry->fields[n_u64],
+                                        i == se->n_fields - 1 ? "" : " ");
+                       n_u64++;
                }
        }
+end:
+       trace_seq_putc(s, '\n');
 
-       return 0;
+       return trace_handle_return(s);
 }
 
-static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to,
-                                      struct tracing_map_elt *from)
+static struct trace_event_functions synth_event_funcs = {
+       .trace          = print_synth_event
+};
+
+static notrace void trace_event_raw_event_synth(void *__data,
+                                               u64 *var_ref_vals,
+                                               unsigned int var_ref_idx)
 {
-       char *comm_from = from->private_data;
-       char *comm_to = to->private_data;
+       struct trace_event_file *trace_file = __data;
+       struct synth_trace_event *entry;
+       struct trace_event_buffer fbuffer;
+       struct ring_buffer *buffer;
+       struct synth_event *event;
+       unsigned int i, n_u64;
+       int fields_size = 0;
+
+       event = trace_file->event_call->data;
+
+       if (trace_trigger_soft_disabled(trace_file))
+               return;
+
+       fields_size = event->n_u64 * sizeof(u64);
+
+       /*
+        * Avoid ring buffer recursion detection, as this event
+        * is being performed within another event.
+        */
+       buffer = trace_file->tr->trace_buffer.buffer;
+       ring_buffer_nest_start(buffer);
+
+       entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+                                          sizeof(*entry) + fields_size);
+       if (!entry)
+               goto out;
+
+       for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
+               if (event->fields[i]->is_string) {
+                       char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i];
+                       char *str_field = (char *)&entry->fields[n_u64];
+
+                       strscpy(str_field, str_val, STR_VAR_LEN_MAX);
+                       n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+               } else {
+                       entry->fields[n_u64] = var_ref_vals[var_ref_idx + i];
+                       n_u64++;
+               }
+       }
 
-       if (comm_from)
-               memcpy(comm_to, comm_from, TASK_COMM_LEN + 1);
+       trace_event_buffer_commit(&fbuffer);
+out:
+       ring_buffer_nest_end(buffer);
 }
 
-static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt)
+static void free_synth_event_print_fmt(struct trace_event_call *call)
 {
-       char *comm = elt->private_data;
-
-       if (comm)
-               save_comm(comm, current);
+       if (call) {
+               kfree(call->print_fmt);
+               call->print_fmt = NULL;
+       }
 }
 
-static const struct tracing_map_ops hist_trigger_elt_comm_ops = {
-       .elt_alloc      = hist_trigger_elt_comm_alloc,
-       .elt_copy       = hist_trigger_elt_comm_copy,
-       .elt_free       = hist_trigger_elt_comm_free,
-       .elt_init       = hist_trigger_elt_comm_init,
-};
+static int __set_synth_event_print_fmt(struct synth_event *event,
+                                      char *buf, int len)
+{
+       const char *fmt;
+       int pos = 0;
+       int i;
+
+       /* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
+
+       pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
+       for (i = 0; i < event->n_fields; i++) {
+               fmt = synth_field_fmt(event->fields[i]->type);
+               pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s",
+                               event->fields[i]->name, fmt,
+                               i == event->n_fields - 1 ? "" : ", ");
+       }
+       pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
 
-static void destroy_hist_field(struct hist_field *hist_field,
-                              unsigned int level)
+       for (i = 0; i < event->n_fields; i++) {
+               pos += snprintf(buf + pos, LEN_OR_ZERO,
+                               ", REC->%s", event->fields[i]->name);
+       }
+
+#undef LEN_OR_ZERO
+
+       /* return the length of print_fmt */
+       return pos;
+}
+
+static int set_synth_event_print_fmt(struct trace_event_call *call)
 {
-       unsigned int i;
+       struct synth_event *event = call->data;
+       char *print_fmt;
+       int len;
 
-       if (level > 2)
-               return;
+       /* First: called with 0 length to calculate the needed length */
+       len = __set_synth_event_print_fmt(event, NULL, 0);
 
-       if (!hist_field)
-               return;
+       print_fmt = kmalloc(len + 1, GFP_KERNEL);
+       if (!print_fmt)
+               return -ENOMEM;
 
-       for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++)
-               destroy_hist_field(hist_field->operands[i], level + 1);
+       /* Second: actually write the @print_fmt */
+       __set_synth_event_print_fmt(event, print_fmt, len + 1);
+       call->print_fmt = print_fmt;
 
-       kfree(hist_field);
+       return 0;
 }
 
-static struct hist_field *create_hist_field(struct ftrace_event_field *field,
-                                           unsigned long flags)
+static void free_synth_field(struct synth_field *field)
 {
-       struct hist_field *hist_field;
+       kfree(field->type);
+       kfree(field->name);
+       kfree(field);
+}
 
-       if (field && is_function_field(field))
-               return NULL;
+static struct synth_field *parse_synth_field(char *field_type,
+                                            char *field_name)
+{
+       struct synth_field *field;
+       int len, ret = 0;
+       char *array;
 
-       hist_field = kzalloc(sizeof(struct hist_field), GFP_KERNEL);
-       if (!hist_field)
-               return NULL;
+       if (field_type[0] == ';')
+               field_type++;
 
-       if (flags & HIST_FIELD_FL_HITCOUNT) {
-               hist_field->fn = hist_field_counter;
-               goto out;
-       }
+       len = strlen(field_name);
+       if (field_name[len - 1] == ';')
+               field_name[len - 1] = '\0';
 
-       if (flags & HIST_FIELD_FL_STACKTRACE) {
-               hist_field->fn = hist_field_none;
-               goto out;
+       field = kzalloc(sizeof(*field), GFP_KERNEL);
+       if (!field)
+               return ERR_PTR(-ENOMEM);
+
+       len = strlen(field_type) + 1;
+       array = strchr(field_name, '[');
+       if (array)
+               len += strlen(array);
+       field->type = kzalloc(len, GFP_KERNEL);
+       if (!field->type) {
+               ret = -ENOMEM;
+               goto free;
+       }
+       strcat(field->type, field_type);
+       if (array) {
+               strcat(field->type, array);
+               *array = '\0';
        }
 
-       if (flags & HIST_FIELD_FL_LOG2) {
+       field->size = synth_field_size(field->type);
+       if (!field->size) {
+               ret = -EINVAL;
+               goto free;
+       }
+
+       if (synth_field_is_string(field->type))
+               field->is_string = true;
+
+       field->is_signed = synth_field_signed(field->type);
+
+       field->name = kstrdup(field_name, GFP_KERNEL);
+       if (!field->name) {
+               ret = -ENOMEM;
+               goto free;
+       }
+ out:
+       return field;
+ free:
+       free_synth_field(field);
+       field = ERR_PTR(ret);
+       goto out;
+}
+
+static void free_synth_tracepoint(struct tracepoint *tp)
+{
+       if (!tp)
+               return;
+
+       kfree(tp->name);
+       kfree(tp);
+}
+
+static struct tracepoint *alloc_synth_tracepoint(char *name)
+{
+       struct tracepoint *tp;
+
+       tp = kzalloc(sizeof(*tp), GFP_KERNEL);
+       if (!tp)
+               return ERR_PTR(-ENOMEM);
+
+       tp->name = kstrdup(name, GFP_KERNEL);
+       if (!tp->name) {
+               kfree(tp);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       return tp;
+}
+
+typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
+                                   unsigned int var_ref_idx);
+
+static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals,
+                              unsigned int var_ref_idx)
+{
+       struct tracepoint *tp = event->tp;
+
+       if (unlikely(atomic_read(&tp->key.enabled) > 0)) {
+               struct tracepoint_func *probe_func_ptr;
+               synth_probe_func_t probe_func;
+               void *__data;
+
+               if (!(cpu_online(raw_smp_processor_id())))
+                       return;
+
+               probe_func_ptr = rcu_dereference_sched((tp)->funcs);
+               if (probe_func_ptr) {
+                       do {
+                               probe_func = probe_func_ptr->func;
+                               __data = probe_func_ptr->data;
+                               probe_func(__data, var_ref_vals, var_ref_idx);
+                       } while ((++probe_func_ptr)->func);
+               }
+       }
+}
+
+static struct synth_event *find_synth_event(const char *name)
+{
+       struct synth_event *event;
+
+       list_for_each_entry(event, &synth_event_list, list) {
+               if (strcmp(event->name, name) == 0)
+                       return event;
+       }
+
+       return NULL;
+}
+
+static int register_synth_event(struct synth_event *event)
+{
+       struct trace_event_call *call = &event->call;
+       int ret = 0;
+
+       event->call.class = &event->class;
+       event->class.system = kstrdup(SYNTH_SYSTEM, GFP_KERNEL);
+       if (!event->class.system) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       event->tp = alloc_synth_tracepoint(event->name);
+       if (IS_ERR(event->tp)) {
+               ret = PTR_ERR(event->tp);
+               event->tp = NULL;
+               goto out;
+       }
+
+       INIT_LIST_HEAD(&call->class->fields);
+       call->event.funcs = &synth_event_funcs;
+       call->class->define_fields = synth_event_define_fields;
+
+       ret = register_trace_event(&call->event);
+       if (!ret) {
+               ret = -ENODEV;
+               goto out;
+       }
+       call->flags = TRACE_EVENT_FL_TRACEPOINT;
+       call->class->reg = trace_event_reg;
+       call->class->probe = trace_event_raw_event_synth;
+       call->data = event;
+       call->tp = event->tp;
+
+       ret = trace_add_event_call(call);
+       if (ret) {
+               pr_warn("Failed to register synthetic event: %s\n",
+                       trace_event_name(call));
+               goto err;
+       }
+
+       ret = set_synth_event_print_fmt(call);
+       if (ret < 0) {
+               trace_remove_event_call(call);
+               goto err;
+       }
+ out:
+       return ret;
+ err:
+       unregister_trace_event(&call->event);
+       goto out;
+}
+
+static int unregister_synth_event(struct synth_event *event)
+{
+       struct trace_event_call *call = &event->call;
+       int ret;
+
+       ret = trace_remove_event_call(call);
+
+       return ret;
+}
+
+static void free_synth_event(struct synth_event *event)
+{
+       unsigned int i;
+
+       if (!event)
+               return;
+
+       for (i = 0; i < event->n_fields; i++)
+               free_synth_field(event->fields[i]);
+
+       kfree(event->fields);
+       kfree(event->name);
+       kfree(event->class.system);
+       free_synth_tracepoint(event->tp);
+       free_synth_event_print_fmt(&event->call);
+       kfree(event);
+}
+
+static struct synth_event *alloc_synth_event(char *event_name, int n_fields,
+                                            struct synth_field **fields)
+{
+       struct synth_event *event;
+       unsigned int i;
+
+       event = kzalloc(sizeof(*event), GFP_KERNEL);
+       if (!event) {
+               event = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       event->name = kstrdup(event_name, GFP_KERNEL);
+       if (!event->name) {
+               kfree(event);
+               event = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       event->fields = kcalloc(n_fields, sizeof(*event->fields), GFP_KERNEL);
+       if (!event->fields) {
+               free_synth_event(event);
+               event = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       for (i = 0; i < n_fields; i++)
+               event->fields[i] = fields[i];
+
+       event->n_fields = n_fields;
+ out:
+       return event;
+}
+
+static void action_trace(struct hist_trigger_data *hist_data,
+                        struct tracing_map_elt *elt, void *rec,
+                        struct ring_buffer_event *rbe,
+                        struct action_data *data, u64 *var_ref_vals)
+{
+       struct synth_event *event = data->onmatch.synth_event;
+
+       trace_synth(event, var_ref_vals, data->onmatch.var_ref_idx);
+}
+
+struct hist_var_data {
+       struct list_head list;
+       struct hist_trigger_data *hist_data;
+};
+
+static void add_or_delete_synth_event(struct synth_event *event, int delete)
+{
+       if (delete)
+               free_synth_event(event);
+       else {
+               mutex_lock(&synth_event_mutex);
+               if (!find_synth_event(event->name))
+                       list_add(&event->list, &synth_event_list);
+               else
+                       free_synth_event(event);
+               mutex_unlock(&synth_event_mutex);
+       }
+}
+
+static int create_synth_event(int argc, char **argv)
+{
+       struct synth_field *field, *fields[SYNTH_FIELDS_MAX];
+       struct synth_event *event = NULL;
+       bool delete_event = false;
+       int i, n_fields = 0, ret = 0;
+       char *name;
+
+       mutex_lock(&synth_event_mutex);
+
+       /*
+        * Argument syntax:
+        *  - Add synthetic event: <event_name> field[;field] ...
+        *  - Remove synthetic event: !<event_name> field[;field] ...
+        *      where 'field' = type field_name
+        */
+       if (argc < 1) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       name = argv[0];
+       if (name[0] == '!') {
+               delete_event = true;
+               name++;
+       }
+
+       event = find_synth_event(name);
+       if (event) {
+               if (delete_event) {
+                       if (event->ref) {
+                               event = NULL;
+                               ret = -EBUSY;
+                               goto out;
+                       }
+                       list_del(&event->list);
+                       goto out;
+               }
+               event = NULL;
+               ret = -EEXIST;
+               goto out;
+       } else if (delete_event)
+               goto out;
+
+       if (argc < 2) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       for (i = 1; i < argc - 1; i++) {
+               if (strcmp(argv[i], ";") == 0)
+                       continue;
+               if (n_fields == SYNTH_FIELDS_MAX) {
+                       ret = -EINVAL;
+                       goto err;
+               }
+
+               field = parse_synth_field(argv[i], argv[i + 1]);
+               if (IS_ERR(field)) {
+                       ret = PTR_ERR(field);
+                       goto err;
+               }
+               fields[n_fields] = field;
+               i++; n_fields++;
+       }
+
+       if (i < argc) {
+               ret = -EINVAL;
+               goto err;
+       }
+
+       event = alloc_synth_event(name, n_fields, fields);
+       if (IS_ERR(event)) {
+               ret = PTR_ERR(event);
+               event = NULL;
+               goto err;
+       }
+ out:
+       mutex_unlock(&synth_event_mutex);
+
+       if (event) {
+               if (delete_event) {
+                       ret = unregister_synth_event(event);
+                       add_or_delete_synth_event(event, !ret);
+               } else {
+                       ret = register_synth_event(event);
+                       add_or_delete_synth_event(event, ret);
+               }
+       }
+
+       return ret;
+ err:
+       mutex_unlock(&synth_event_mutex);
+
+       for (i = 0; i < n_fields; i++)
+               free_synth_field(fields[i]);
+       free_synth_event(event);
+
+       return ret;
+}
+
+static int release_all_synth_events(void)
+{
+       struct list_head release_events;
+       struct synth_event *event, *e;
+       int ret = 0;
+
+       INIT_LIST_HEAD(&release_events);
+
+       mutex_lock(&synth_event_mutex);
+
+       list_for_each_entry(event, &synth_event_list, list) {
+               if (event->ref) {
+                       mutex_unlock(&synth_event_mutex);
+                       return -EBUSY;
+               }
+       }
+
+       list_splice_init(&event->list, &release_events);
+
+       mutex_unlock(&synth_event_mutex);
+
+       list_for_each_entry_safe(event, e, &release_events, list) {
+               list_del(&event->list);
+
+               ret = unregister_synth_event(event);
+               add_or_delete_synth_event(event, !ret);
+       }
+
+       return ret;
+}
+
+
+static void *synth_events_seq_start(struct seq_file *m, loff_t *pos)
+{
+       mutex_lock(&synth_event_mutex);
+
+       return seq_list_start(&synth_event_list, *pos);
+}
+
+static void *synth_events_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       return seq_list_next(v, &synth_event_list, pos);
+}
+
+static void synth_events_seq_stop(struct seq_file *m, void *v)
+{
+       mutex_unlock(&synth_event_mutex);
+}
+
+static int synth_events_seq_show(struct seq_file *m, void *v)
+{
+       struct synth_field *field;
+       struct synth_event *event = v;
+       unsigned int i;
+
+       seq_printf(m, "%s\t", event->name);
+
+       for (i = 0; i < event->n_fields; i++) {
+               field = event->fields[i];
+
+               /* parameter values */
+               seq_printf(m, "%s %s%s", field->type, field->name,
+                          i == event->n_fields - 1 ? "" : "; ");
+       }
+
+       seq_putc(m, '\n');
+
+       return 0;
+}
+
+static const struct seq_operations synth_events_seq_op = {
+       .start  = synth_events_seq_start,
+       .next   = synth_events_seq_next,
+       .stop   = synth_events_seq_stop,
+       .show   = synth_events_seq_show
+};
+
+static int synth_events_open(struct inode *inode, struct file *file)
+{
+       int ret;
+
+       if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+               ret = release_all_synth_events();
+               if (ret < 0)
+                       return ret;
+       }
+
+       return seq_open(file, &synth_events_seq_op);
+}
+
+static ssize_t synth_events_write(struct file *file,
+                                 const char __user *buffer,
+                                 size_t count, loff_t *ppos)
+{
+       return trace_parse_run_command(file, buffer, count, ppos,
+                                      create_synth_event);
+}
+
+static const struct file_operations synth_events_fops = {
+       .open           = synth_events_open,
+       .write          = synth_events_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
+
+static u64 hist_field_timestamp(struct hist_field *hist_field,
+                               struct tracing_map_elt *elt,
+                               struct ring_buffer_event *rbe,
+                               void *event)
+{
+       struct hist_trigger_data *hist_data = hist_field->hist_data;
+       struct trace_array *tr = hist_data->event_file->tr;
+
+       u64 ts = ring_buffer_event_time_stamp(rbe);
+
+       if (hist_data->attrs->ts_in_usecs && trace_clock_in_ns(tr))
+               ts = ns2usecs(ts);
+
+       return ts;
+}
+
+static u64 hist_field_cpu(struct hist_field *hist_field,
+                         struct tracing_map_elt *elt,
+                         struct ring_buffer_event *rbe,
+                         void *event)
+{
+       int cpu = smp_processor_id();
+
+       return cpu;
+}
+
+static struct hist_field *
+check_field_for_var_ref(struct hist_field *hist_field,
+                       struct hist_trigger_data *var_data,
+                       unsigned int var_idx)
+{
+       struct hist_field *found = NULL;
+
+       if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+               if (hist_field->var.idx == var_idx &&
+                   hist_field->var.hist_data == var_data) {
+                       found = hist_field;
+               }
+       }
+
+       return found;
+}
+
+static struct hist_field *
+check_field_for_var_refs(struct hist_trigger_data *hist_data,
+                        struct hist_field *hist_field,
+                        struct hist_trigger_data *var_data,
+                        unsigned int var_idx,
+                        unsigned int level)
+{
+       struct hist_field *found = NULL;
+       unsigned int i;
+
+       if (level > 3)
+               return found;
+
+       if (!hist_field)
+               return found;
+
+       found = check_field_for_var_ref(hist_field, var_data, var_idx);
+       if (found)
+               return found;
+
+       for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
+               struct hist_field *operand;
+
+               operand = hist_field->operands[i];
+               found = check_field_for_var_refs(hist_data, operand, var_data,
+                                                var_idx, level + 1);
+               if (found)
+                       return found;
+       }
+
+       return found;
+}
+
+static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data,
+                                      struct hist_trigger_data *var_data,
+                                      unsigned int var_idx)
+{
+       struct hist_field *hist_field, *found = NULL;
+       unsigned int i;
+
+       for_each_hist_field(i, hist_data) {
+               hist_field = hist_data->fields[i];
+               found = check_field_for_var_refs(hist_data, hist_field,
+                                                var_data, var_idx, 0);
+               if (found)
+                       return found;
+       }
+
+       for (i = 0; i < hist_data->n_synth_var_refs; i++) {
+               hist_field = hist_data->synth_var_refs[i];
+               found = check_field_for_var_refs(hist_data, hist_field,
+                                                var_data, var_idx, 0);
+               if (found)
+                       return found;
+       }
+
+       return found;
+}
+
+static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data,
+                                          unsigned int var_idx)
+{
+       struct trace_array *tr = hist_data->event_file->tr;
+       struct hist_field *found = NULL;
+       struct hist_var_data *var_data;
+
+       list_for_each_entry(var_data, &tr->hist_vars, list) {
+               if (var_data->hist_data == hist_data)
+                       continue;
+               found = find_var_ref(var_data->hist_data, hist_data, var_idx);
+               if (found)
+                       break;
+       }
+
+       return found;
+}
+
+static bool check_var_refs(struct hist_trigger_data *hist_data)
+{
+       struct hist_field *field;
+       bool found = false;
+       int i;
+
+       for_each_hist_field(i, hist_data) {
+               field = hist_data->fields[i];
+               if (field && field->flags & HIST_FIELD_FL_VAR) {
+                       if (find_any_var_ref(hist_data, field->var.idx)) {
+                               found = true;
+                               break;
+                       }
+               }
+       }
+
+       return found;
+}
+
+static struct hist_var_data *find_hist_vars(struct hist_trigger_data *hist_data)
+{
+       struct trace_array *tr = hist_data->event_file->tr;
+       struct hist_var_data *var_data, *found = NULL;
+
+       list_for_each_entry(var_data, &tr->hist_vars, list) {
+               if (var_data->hist_data == hist_data) {
+                       found = var_data;
+                       break;
+               }
+       }
+
+       return found;
+}
+
+static bool field_has_hist_vars(struct hist_field *hist_field,
+                               unsigned int level)
+{
+       int i;
+
+       if (level > 3)
+               return false;
+
+       if (!hist_field)
+               return false;
+
+       if (hist_field->flags & HIST_FIELD_FL_VAR ||
+           hist_field->flags & HIST_FIELD_FL_VAR_REF)
+               return true;
+
+       for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
+               struct hist_field *operand;
+
+               operand = hist_field->operands[i];
+               if (field_has_hist_vars(operand, level + 1))
+                       return true;
+       }
+
+       return false;
+}
+
+static bool has_hist_vars(struct hist_trigger_data *hist_data)
+{
+       struct hist_field *hist_field;
+       int i;
+
+       for_each_hist_field(i, hist_data) {
+               hist_field = hist_data->fields[i];
+               if (field_has_hist_vars(hist_field, 0))
+                       return true;
+       }
+
+       return false;
+}
+
+static int save_hist_vars(struct hist_trigger_data *hist_data)
+{
+       struct trace_array *tr = hist_data->event_file->tr;
+       struct hist_var_data *var_data;
+
+       var_data = find_hist_vars(hist_data);
+       if (var_data)
+               return 0;
+
+       if (trace_array_get(tr) < 0)
+               return -ENODEV;
+
+       var_data = kzalloc(sizeof(*var_data), GFP_KERNEL);
+       if (!var_data) {
+               trace_array_put(tr);
+               return -ENOMEM;
+       }
+
+       var_data->hist_data = hist_data;
+       list_add(&var_data->list, &tr->hist_vars);
+
+       return 0;
+}
+
+static void remove_hist_vars(struct hist_trigger_data *hist_data)
+{
+       struct trace_array *tr = hist_data->event_file->tr;
+       struct hist_var_data *var_data;
+
+       var_data = find_hist_vars(hist_data);
+       if (!var_data)
+               return;
+
+       if (WARN_ON(check_var_refs(hist_data)))
+               return;
+
+       list_del(&var_data->list);
+
+       kfree(var_data);
+
+       trace_array_put(tr);
+}
+
+static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
+                                        const char *var_name)
+{
+       struct hist_field *hist_field, *found = NULL;
+       int i;
+
+       for_each_hist_field(i, hist_data) {
+               hist_field = hist_data->fields[i];
+               if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR &&
+                   strcmp(hist_field->var.name, var_name) == 0) {
+                       found = hist_field;
+                       break;
+               }
+       }
+
+       return found;
+}
+
+static struct hist_field *find_var(struct hist_trigger_data *hist_data,
+                                  struct trace_event_file *file,
+                                  const char *var_name)
+{
+       struct hist_trigger_data *test_data;
+       struct event_trigger_data *test;
+       struct hist_field *hist_field;
+
+       hist_field = find_var_field(hist_data, var_name);
+       if (hist_field)
+               return hist_field;
+
+       list_for_each_entry_rcu(test, &file->triggers, list) {
+               if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+                       test_data = test->private_data;
+                       hist_field = find_var_field(test_data, var_name);
+                       if (hist_field)
+                               return hist_field;
+               }
+       }
+
+       return NULL;
+}
+
+static struct trace_event_file *find_var_file(struct trace_array *tr,
+                                             char *system,
+                                             char *event_name,
+                                             char *var_name)
+{
+       struct hist_trigger_data *var_hist_data;
+       struct hist_var_data *var_data;
+       struct trace_event_file *file, *found = NULL;
+
+       if (system)
+               return find_event_file(tr, system, event_name);
+
+       list_for_each_entry(var_data, &tr->hist_vars, list) {
+               var_hist_data = var_data->hist_data;
+               file = var_hist_data->event_file;
+               if (file == found)
+                       continue;
+
+               if (find_var_field(var_hist_data, var_name)) {
+                       if (found) {
+                               hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
+                               return NULL;
+                       }
+
+                       found = file;
+               }
+       }
+
+       return found;
+}
+
+static struct hist_field *find_file_var(struct trace_event_file *file,
+                                       const char *var_name)
+{
+       struct hist_trigger_data *test_data;
+       struct event_trigger_data *test;
+       struct hist_field *hist_field;
+
+       list_for_each_entry_rcu(test, &file->triggers, list) {
+               if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+                       test_data = test->private_data;
+                       hist_field = find_var_field(test_data, var_name);
+                       if (hist_field)
+                               return hist_field;
+               }
+       }
+
+       return NULL;
+}
+
+static struct hist_field *
+find_match_var(struct hist_trigger_data *hist_data, char *var_name)
+{
+       struct trace_array *tr = hist_data->event_file->tr;
+       struct hist_field *hist_field, *found = NULL;
+       struct trace_event_file *file;
+       unsigned int i;
+
+       for (i = 0; i < hist_data->n_actions; i++) {
+               struct action_data *data = hist_data->actions[i];
+
+               if (data->fn == action_trace) {
+                       char *system = data->onmatch.match_event_system;
+                       char *event_name = data->onmatch.match_event;
+
+                       file = find_var_file(tr, system, event_name, var_name);
+                       if (!file)
+                               continue;
+                       hist_field = find_file_var(file, var_name);
+                       if (hist_field) {
+                               if (found) {
+                                       hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
+                                       return ERR_PTR(-EINVAL);
+                               }
+
+                               found = hist_field;
+                       }
+               }
+       }
+       return found;
+}
+
+static struct hist_field *find_event_var(struct hist_trigger_data *hist_data,
+                                        char *system,
+                                        char *event_name,
+                                        char *var_name)
+{
+       struct trace_array *tr = hist_data->event_file->tr;
+       struct hist_field *hist_field = NULL;
+       struct trace_event_file *file;
+
+       if (!system || !event_name) {
+               hist_field = find_match_var(hist_data, var_name);
+               if (IS_ERR(hist_field))
+                       return NULL;
+               if (hist_field)
+                       return hist_field;
+       }
+
+       file = find_var_file(tr, system, event_name, var_name);
+       if (!file)
+               return NULL;
+
+       hist_field = find_file_var(file, var_name);
+
+       return hist_field;
+}
+
+struct hist_elt_data {
+       char *comm;
+       u64 *var_ref_vals;
+       char *field_var_str[SYNTH_FIELDS_MAX];
+};
+
+static u64 hist_field_var_ref(struct hist_field *hist_field,
+                             struct tracing_map_elt *elt,
+                             struct ring_buffer_event *rbe,
+                             void *event)
+{
+       struct hist_elt_data *elt_data;
+       u64 var_val = 0;
+
+       elt_data = elt->private_data;
+       var_val = elt_data->var_ref_vals[hist_field->var_ref_idx];
+
+       return var_val;
+}
+
+static bool resolve_var_refs(struct hist_trigger_data *hist_data, void *key,
+                            u64 *var_ref_vals, bool self)
+{
+       struct hist_trigger_data *var_data;
+       struct tracing_map_elt *var_elt;
+       struct hist_field *hist_field;
+       unsigned int i, var_idx;
+       bool resolved = true;
+       u64 var_val = 0;
+
+       for (i = 0; i < hist_data->n_var_refs; i++) {
+               hist_field = hist_data->var_refs[i];
+               var_idx = hist_field->var.idx;
+               var_data = hist_field->var.hist_data;
+
+               if (var_data == NULL) {
+                       resolved = false;
+                       break;
+               }
+
+               if ((self && var_data != hist_data) ||
+                   (!self && var_data == hist_data))
+                       continue;
+
+               var_elt = tracing_map_lookup(var_data->map, key);
+               if (!var_elt) {
+                       resolved = false;
+                       break;
+               }
+
+               if (!tracing_map_var_set(var_elt, var_idx)) {
+                       resolved = false;
+                       break;
+               }
+
+               if (self || !hist_field->read_once)
+                       var_val = tracing_map_read_var(var_elt, var_idx);
+               else
+                       var_val = tracing_map_read_var_once(var_elt, var_idx);
+
+               var_ref_vals[i] = var_val;
+       }
+
+       return resolved;
+}
+
+static const char *hist_field_name(struct hist_field *field,
+                                  unsigned int level)
+{
+       const char *field_name = "";
+
+       if (level > 1)
+               return field_name;
+
+       if (field->field)
+               field_name = field->field->name;
+       else if (field->flags & HIST_FIELD_FL_LOG2 ||
+                field->flags & HIST_FIELD_FL_ALIAS)
+               field_name = hist_field_name(field->operands[0], ++level);
+       else if (field->flags & HIST_FIELD_FL_CPU)
+               field_name = "cpu";
+       else if (field->flags & HIST_FIELD_FL_EXPR ||
+                field->flags & HIST_FIELD_FL_VAR_REF) {
+               if (field->system) {
+                       static char full_name[MAX_FILTER_STR_VAL];
+
+                       strcat(full_name, field->system);
+                       strcat(full_name, ".");
+                       strcat(full_name, field->event_name);
+                       strcat(full_name, ".");
+                       strcat(full_name, field->name);
+                       field_name = full_name;
+               } else
+                       field_name = field->name;
+       } else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
+               field_name = "common_timestamp";
+
+       if (field_name == NULL)
+               field_name = "";
+
+       return field_name;
+}
+
+static hist_field_fn_t select_value_fn(int field_size, int field_is_signed)
+{
+       hist_field_fn_t fn = NULL;
+
+       switch (field_size) {
+       case 8:
+               if (field_is_signed)
+                       fn = hist_field_s64;
+               else
+                       fn = hist_field_u64;
+               break;
+       case 4:
+               if (field_is_signed)
+                       fn = hist_field_s32;
+               else
+                       fn = hist_field_u32;
+               break;
+       case 2:
+               if (field_is_signed)
+                       fn = hist_field_s16;
+               else
+                       fn = hist_field_u16;
+               break;
+       case 1:
+               if (field_is_signed)
+                       fn = hist_field_s8;
+               else
+                       fn = hist_field_u8;
+               break;
+       }
+
+       return fn;
+}
+
+static int parse_map_size(char *str)
+{
+       unsigned long size, map_bits;
+       int ret;
+
+       strsep(&str, "=");
+       if (!str) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = kstrtoul(str, 0, &size);
+       if (ret)
+               goto out;
+
+       map_bits = ilog2(roundup_pow_of_two(size));
+       if (map_bits < TRACING_MAP_BITS_MIN ||
+           map_bits > TRACING_MAP_BITS_MAX)
+               ret = -EINVAL;
+       else
+               ret = map_bits;
+ out:
+       return ret;
+}
+
+static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs)
+{
+       unsigned int i;
+
+       if (!attrs)
+               return;
+
+       for (i = 0; i < attrs->n_assignments; i++)
+               kfree(attrs->assignment_str[i]);
+
+       for (i = 0; i < attrs->n_actions; i++)
+               kfree(attrs->action_str[i]);
+
+       kfree(attrs->name);
+       kfree(attrs->sort_key_str);
+       kfree(attrs->keys_str);
+       kfree(attrs->vals_str);
+       kfree(attrs->clock);
+       kfree(attrs);
+}
+
+static int parse_action(char *str, struct hist_trigger_attrs *attrs)
+{
+       int ret = -EINVAL;
+
+       if (attrs->n_actions >= HIST_ACTIONS_MAX)
+               return ret;
+
+       if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0) ||
+           (strncmp(str, "onmax(", strlen("onmax(")) == 0)) {
+               attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL);
+               if (!attrs->action_str[attrs->n_actions]) {
+                       ret = -ENOMEM;
+                       return ret;
+               }
+               attrs->n_actions++;
+               ret = 0;
+       }
+
+       return ret;
+}
+
+static int parse_assignment(char *str, struct hist_trigger_attrs *attrs)
+{
+       int ret = 0;
+
+       if ((strncmp(str, "key=", strlen("key=")) == 0) ||
+           (strncmp(str, "keys=", strlen("keys=")) == 0)) {
+               attrs->keys_str = kstrdup(str, GFP_KERNEL);
+               if (!attrs->keys_str) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       } else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
+                (strncmp(str, "vals=", strlen("vals=")) == 0) ||
+                (strncmp(str, "values=", strlen("values=")) == 0)) {
+               attrs->vals_str = kstrdup(str, GFP_KERNEL);
+               if (!attrs->vals_str) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       } else if (strncmp(str, "sort=", strlen("sort=")) == 0) {
+               attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
+               if (!attrs->sort_key_str) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       } else if (strncmp(str, "name=", strlen("name=")) == 0) {
+               attrs->name = kstrdup(str, GFP_KERNEL);
+               if (!attrs->name) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       } else if (strncmp(str, "clock=", strlen("clock=")) == 0) {
+               strsep(&str, "=");
+               if (!str) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               str = strstrip(str);
+               attrs->clock = kstrdup(str, GFP_KERNEL);
+               if (!attrs->clock) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       } else if (strncmp(str, "size=", strlen("size=")) == 0) {
+               int map_bits = parse_map_size(str);
+
+               if (map_bits < 0) {
+                       ret = map_bits;
+                       goto out;
+               }
+               attrs->map_bits = map_bits;
+       } else {
+               char *assignment;
+
+               if (attrs->n_assignments == TRACING_MAP_VARS_MAX) {
+                       hist_err("Too many variables defined: ", str);
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               assignment = kstrdup(str, GFP_KERNEL);
+               if (!assignment) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               attrs->assignment_str[attrs->n_assignments++] = assignment;
+       }
+ out:
+       return ret;
+}
+
+static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
+{
+       struct hist_trigger_attrs *attrs;
+       int ret = 0;
+
+       attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
+       if (!attrs)
+               return ERR_PTR(-ENOMEM);
+
+       while (trigger_str) {
+               char *str = strsep(&trigger_str, ":");
+
+               if (strchr(str, '=')) {
+                       ret = parse_assignment(str, attrs);
+                       if (ret)
+                               goto free;
+               } else if (strcmp(str, "pause") == 0)
+                       attrs->pause = true;
+               else if ((strcmp(str, "cont") == 0) ||
+                        (strcmp(str, "continue") == 0))
+                       attrs->cont = true;
+               else if (strcmp(str, "clear") == 0)
+                       attrs->clear = true;
+               else {
+                       ret = parse_action(str, attrs);
+                       if (ret)
+                               goto free;
+               }
+       }
+
+       if (!attrs->keys_str) {
+               ret = -EINVAL;
+               goto free;
+       }
+
+       if (!attrs->clock) {
+               attrs->clock = kstrdup("global", GFP_KERNEL);
+               if (!attrs->clock) {
+                       ret = -ENOMEM;
+                       goto free;
+               }
+       }
+
+       return attrs;
+ free:
+       destroy_hist_trigger_attrs(attrs);
+
+       return ERR_PTR(ret);
+}
+
+static inline void save_comm(char *comm, struct task_struct *task)
+{
+       if (!task->pid) {
+               strcpy(comm, "<idle>");
+               return;
+       }
+
+       if (WARN_ON_ONCE(task->pid < 0)) {
+               strcpy(comm, "<XXX>");
+               return;
+       }
+
+       memcpy(comm, task->comm, TASK_COMM_LEN);
+}
+
+static void hist_elt_data_free(struct hist_elt_data *elt_data)
+{
+       unsigned int i;
+
+       for (i = 0; i < SYNTH_FIELDS_MAX; i++)
+               kfree(elt_data->field_var_str[i]);
+
+       kfree(elt_data->comm);
+       kfree(elt_data);
+}
+
+static void hist_trigger_elt_data_free(struct tracing_map_elt *elt)
+{
+       struct hist_elt_data *elt_data = elt->private_data;
+
+       hist_elt_data_free(elt_data);
+}
+
+static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
+{
+       struct hist_trigger_data *hist_data = elt->map->private_data;
+       unsigned int size = TASK_COMM_LEN;
+       struct hist_elt_data *elt_data;
+       struct hist_field *key_field;
+       unsigned int i, n_str;
+
+       elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL);
+       if (!elt_data)
+               return -ENOMEM;
+
+       for_each_hist_key_field(i, hist_data) {
+               key_field = hist_data->fields[i];
+
+               if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
+                       elt_data->comm = kzalloc(size, GFP_KERNEL);
+                       if (!elt_data->comm) {
+                               kfree(elt_data);
+                               return -ENOMEM;
+                       }
+                       break;
+               }
+       }
+
+       n_str = hist_data->n_field_var_str + hist_data->n_max_var_str;
+
+       size = STR_VAR_LEN_MAX;
+
+       for (i = 0; i < n_str; i++) {
+               elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL);
+               if (!elt_data->field_var_str[i]) {
+                       hist_elt_data_free(elt_data);
+                       return -ENOMEM;
+               }
+       }
+
+       elt->private_data = elt_data;
+
+       return 0;
+}
+
+static void hist_trigger_elt_data_init(struct tracing_map_elt *elt)
+{
+       struct hist_elt_data *elt_data = elt->private_data;
+
+       if (elt_data->comm)
+               save_comm(elt_data->comm, current);
+}
+
+static const struct tracing_map_ops hist_trigger_elt_data_ops = {
+       .elt_alloc      = hist_trigger_elt_data_alloc,
+       .elt_free       = hist_trigger_elt_data_free,
+       .elt_init       = hist_trigger_elt_data_init,
+};
+
+static const char *get_hist_field_flags(struct hist_field *hist_field)
+{
+       const char *flags_str = NULL;
+
+       if (hist_field->flags & HIST_FIELD_FL_HEX)
+               flags_str = "hex";
+       else if (hist_field->flags & HIST_FIELD_FL_SYM)
+               flags_str = "sym";
+       else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET)
+               flags_str = "sym-offset";
+       else if (hist_field->flags & HIST_FIELD_FL_EXECNAME)
+               flags_str = "execname";
+       else if (hist_field->flags & HIST_FIELD_FL_SYSCALL)
+               flags_str = "syscall";
+       else if (hist_field->flags & HIST_FIELD_FL_LOG2)
+               flags_str = "log2";
+       else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+               flags_str = "usecs";
+
+       return flags_str;
+}
+
+static void expr_field_str(struct hist_field *field, char *expr)
+{
+       if (field->flags & HIST_FIELD_FL_VAR_REF)
+               strcat(expr, "$");
+
+       strcat(expr, hist_field_name(field, 0));
+
+       if (field->flags && !(field->flags & HIST_FIELD_FL_VAR_REF)) {
+               const char *flags_str = get_hist_field_flags(field);
+
+               if (flags_str) {
+                       strcat(expr, ".");
+                       strcat(expr, flags_str);
+               }
+       }
+}
+
+static char *expr_str(struct hist_field *field, unsigned int level)
+{
+       char *expr;
+
+       if (level > 1)
+               return NULL;
+
+       expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+       if (!expr)
+               return NULL;
+
+       if (!field->operands[0]) {
+               expr_field_str(field, expr);
+               return expr;
+       }
+
+       if (field->operator == FIELD_OP_UNARY_MINUS) {
+               char *subexpr;
+
+               strcat(expr, "-(");
+               subexpr = expr_str(field->operands[0], ++level);
+               if (!subexpr) {
+                       kfree(expr);
+                       return NULL;
+               }
+               strcat(expr, subexpr);
+               strcat(expr, ")");
+
+               kfree(subexpr);
+
+               return expr;
+       }
+
+       expr_field_str(field->operands[0], expr);
+
+       switch (field->operator) {
+       case FIELD_OP_MINUS:
+               strcat(expr, "-");
+               break;
+       case FIELD_OP_PLUS:
+               strcat(expr, "+");
+               break;
+       default:
+               kfree(expr);
+               return NULL;
+       }
+
+       expr_field_str(field->operands[1], expr);
+
+       return expr;
+}
+
+static int contains_operator(char *str)
+{
+       enum field_op_id field_op = FIELD_OP_NONE;
+       char *op;
+
+       op = strpbrk(str, "+-");
+       if (!op)
+               return FIELD_OP_NONE;
+
+       switch (*op) {
+       case '-':
+               if (*str == '-')
+                       field_op = FIELD_OP_UNARY_MINUS;
+               else
+                       field_op = FIELD_OP_MINUS;
+               break;
+       case '+':
+               field_op = FIELD_OP_PLUS;
+               break;
+       default:
+               break;
+       }
+
+       return field_op;
+}
+
+static void destroy_hist_field(struct hist_field *hist_field,
+                              unsigned int level)
+{
+       unsigned int i;
+
+       if (level > 3)
+               return;
+
+       if (!hist_field)
+               return;
+
+       for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++)
+               destroy_hist_field(hist_field->operands[i], level + 1);
+
+       kfree(hist_field->var.name);
+       kfree(hist_field->name);
+       kfree(hist_field->type);
+
+       kfree(hist_field);
+}
+
+static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
+                                           struct ftrace_event_field *field,
+                                           unsigned long flags,
+                                           char *var_name)
+{
+       struct hist_field *hist_field;
+
+       if (field && is_function_field(field))
+               return NULL;
+
+       hist_field = kzalloc(sizeof(struct hist_field), GFP_KERNEL);
+       if (!hist_field)
+               return NULL;
+
+       hist_field->hist_data = hist_data;
+
+       if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS)
+               goto out; /* caller will populate */
+
+       if (flags & HIST_FIELD_FL_VAR_REF) {
+               hist_field->fn = hist_field_var_ref;
+               goto out;
+       }
+
+       if (flags & HIST_FIELD_FL_HITCOUNT) {
+               hist_field->fn = hist_field_counter;
+               hist_field->size = sizeof(u64);
+               hist_field->type = kstrdup("u64", GFP_KERNEL);
+               if (!hist_field->type)
+                       goto free;
+               goto out;
+       }
+
+       if (flags & HIST_FIELD_FL_STACKTRACE) {
+               hist_field->fn = hist_field_none;
+               goto out;
+       }
+
+       if (flags & HIST_FIELD_FL_LOG2) {
                unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
                hist_field->fn = hist_field_log2;
-               hist_field->operands[0] = create_hist_field(field, fl);
+               hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
                hist_field->size = hist_field->operands[0]->size;
+               hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL);
+               if (!hist_field->type)
+                       goto free;
+               goto out;
+       }
+
+       if (flags & HIST_FIELD_FL_TIMESTAMP) {
+               hist_field->fn = hist_field_timestamp;
+               hist_field->size = sizeof(u64);
+               hist_field->type = kstrdup("u64", GFP_KERNEL);
+               if (!hist_field->type)
+                       goto free;
+               goto out;
+       }
+
+       if (flags & HIST_FIELD_FL_CPU) {
+               hist_field->fn = hist_field_cpu;
+               hist_field->size = sizeof(int);
+               hist_field->type = kstrdup("unsigned int", GFP_KERNEL);
+               if (!hist_field->type)
+                       goto free;
+               goto out;
+       }
+
+       if (WARN_ON_ONCE(!field))
+               goto out;
+
+       if (is_string_field(field)) {
+               flags |= HIST_FIELD_FL_STRING;
+
+               hist_field->size = MAX_FILTER_STR_VAL;
+               hist_field->type = kstrdup(field->type, GFP_KERNEL);
+               if (!hist_field->type)
+                       goto free;
+
+               if (field->filter_type == FILTER_STATIC_STRING)
+                       hist_field->fn = hist_field_string;
+               else if (field->filter_type == FILTER_DYN_STRING)
+                       hist_field->fn = hist_field_dynstring;
+               else
+                       hist_field->fn = hist_field_pstring;
+       } else {
+               hist_field->size = field->size;
+               hist_field->is_signed = field->is_signed;
+               hist_field->type = kstrdup(field->type, GFP_KERNEL);
+               if (!hist_field->type)
+                       goto free;
+
+               hist_field->fn = select_value_fn(field->size,
+                                                field->is_signed);
+               if (!hist_field->fn) {
+                       destroy_hist_field(hist_field, 0);
+                       return NULL;
+               }
+       }
+ out:
+       hist_field->field = field;
+       hist_field->flags = flags;
+
+       if (var_name) {
+               hist_field->var.name = kstrdup(var_name, GFP_KERNEL);
+               if (!hist_field->var.name)
+                       goto free;
+       }
+
+       return hist_field;
+ free:
+       destroy_hist_field(hist_field, 0);
+       return NULL;
+}
+
+static void destroy_hist_fields(struct hist_trigger_data *hist_data)
+{
+       unsigned int i;
+
+       for (i = 0; i < HIST_FIELDS_MAX; i++) {
+               if (hist_data->fields[i]) {
+                       destroy_hist_field(hist_data->fields[i], 0);
+                       hist_data->fields[i] = NULL;
+               }
+       }
+}
+
+static int init_var_ref(struct hist_field *ref_field,
+                       struct hist_field *var_field,
+                       char *system, char *event_name)
+{
+       int err = 0;
+
+       ref_field->var.idx = var_field->var.idx;
+       ref_field->var.hist_data = var_field->hist_data;
+       ref_field->size = var_field->size;
+       ref_field->is_signed = var_field->is_signed;
+       ref_field->flags |= var_field->flags &
+               (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+
+       if (system) {
+               ref_field->system = kstrdup(system, GFP_KERNEL);
+               if (!ref_field->system)
+                       return -ENOMEM;
+       }
+
+       if (event_name) {
+               ref_field->event_name = kstrdup(event_name, GFP_KERNEL);
+               if (!ref_field->event_name) {
+                       err = -ENOMEM;
+                       goto free;
+               }
+       }
+
+       if (var_field->var.name) {
+               ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL);
+               if (!ref_field->name) {
+                       err = -ENOMEM;
+                       goto free;
+               }
+       } else if (var_field->name) {
+               ref_field->name = kstrdup(var_field->name, GFP_KERNEL);
+               if (!ref_field->name) {
+                       err = -ENOMEM;
+                       goto free;
+               }
+       }
+
+       ref_field->type = kstrdup(var_field->type, GFP_KERNEL);
+       if (!ref_field->type) {
+               err = -ENOMEM;
+               goto free;
+       }
+ out:
+       return err;
+ free:
+       kfree(ref_field->system);
+       kfree(ref_field->event_name);
+       kfree(ref_field->name);
+
+       goto out;
+}
+
+static struct hist_field *create_var_ref(struct hist_field *var_field,
+                                        char *system, char *event_name)
+{
+       unsigned long flags = HIST_FIELD_FL_VAR_REF;
+       struct hist_field *ref_field;
+
+       ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);
+       if (ref_field) {
+               if (init_var_ref(ref_field, var_field, system, event_name)) {
+                       destroy_hist_field(ref_field, 0);
+                       return NULL;
+               }
+       }
+
+       return ref_field;
+}
+
+static bool is_var_ref(char *var_name)
+{
+       if (!var_name || strlen(var_name) < 2 || var_name[0] != '$')
+               return false;
+
+       return true;
+}
+
+static char *field_name_from_var(struct hist_trigger_data *hist_data,
+                                char *var_name)
+{
+       char *name, *field;
+       unsigned int i;
+
+       for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+               name = hist_data->attrs->var_defs.name[i];
+
+               if (strcmp(var_name, name) == 0) {
+                       field = hist_data->attrs->var_defs.expr[i];
+                       if (contains_operator(field) || is_var_ref(field))
+                               continue;
+                       return field;
+               }
+       }
+
+       return NULL;
+}
+
+static char *local_field_var_ref(struct hist_trigger_data *hist_data,
+                                char *system, char *event_name,
+                                char *var_name)
+{
+       struct trace_event_call *call;
+
+       if (system && event_name) {
+               call = hist_data->event_file->event_call;
+
+               if (strcmp(system, call->class->system) != 0)
+                       return NULL;
+
+               if (strcmp(event_name, trace_event_name(call)) != 0)
+                       return NULL;
+       }
+
+       if (!!system != !!event_name)
+               return NULL;
+
+       if (!is_var_ref(var_name))
+               return NULL;
+
+       var_name++;
+
+       return field_name_from_var(hist_data, var_name);
+}
+
+static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data,
+                                       char *system, char *event_name,
+                                       char *var_name)
+{
+       struct hist_field *var_field = NULL, *ref_field = NULL;
+
+       if (!is_var_ref(var_name))
+               return NULL;
+
+       var_name++;
+
+       var_field = find_event_var(hist_data, system, event_name, var_name);
+       if (var_field)
+               ref_field = create_var_ref(var_field, system, event_name);
+
+       if (!ref_field)
+               hist_err_event("Couldn't find variable: $",
+                              system, event_name, var_name);
+
+       return ref_field;
+}
+
+static struct ftrace_event_field *
+parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
+           char *field_str, unsigned long *flags)
+{
+       struct ftrace_event_field *field = NULL;
+       char *field_name, *modifier, *str;
+
+       modifier = str = kstrdup(field_str, GFP_KERNEL);
+       if (!modifier)
+               return ERR_PTR(-ENOMEM);
+
+       field_name = strsep(&modifier, ".");
+       if (modifier) {
+               if (strcmp(modifier, "hex") == 0)
+                       *flags |= HIST_FIELD_FL_HEX;
+               else if (strcmp(modifier, "sym") == 0)
+                       *flags |= HIST_FIELD_FL_SYM;
+               else if (strcmp(modifier, "sym-offset") == 0)
+                       *flags |= HIST_FIELD_FL_SYM_OFFSET;
+               else if ((strcmp(modifier, "execname") == 0) &&
+                        (strcmp(field_name, "common_pid") == 0))
+                       *flags |= HIST_FIELD_FL_EXECNAME;
+               else if (strcmp(modifier, "syscall") == 0)
+                       *flags |= HIST_FIELD_FL_SYSCALL;
+               else if (strcmp(modifier, "log2") == 0)
+                       *flags |= HIST_FIELD_FL_LOG2;
+               else if (strcmp(modifier, "usecs") == 0)
+                       *flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
+               else {
+                       field = ERR_PTR(-EINVAL);
+                       goto out;
+               }
+       }
+
+       if (strcmp(field_name, "common_timestamp") == 0) {
+               *flags |= HIST_FIELD_FL_TIMESTAMP;
+               hist_data->enable_timestamps = true;
+               if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+                       hist_data->attrs->ts_in_usecs = true;
+       } else if (strcmp(field_name, "cpu") == 0)
+               *flags |= HIST_FIELD_FL_CPU;
+       else {
+               field = trace_find_event_field(file->event_call, field_name);
+               if (!field || !field->size) {
+                       field = ERR_PTR(-EINVAL);
+                       goto out;
+               }
+       }
+ out:
+       kfree(str);
+
+       return field;
+}
+
+static struct hist_field *create_alias(struct hist_trigger_data *hist_data,
+                                      struct hist_field *var_ref,
+                                      char *var_name)
+{
+       struct hist_field *alias = NULL;
+       unsigned long flags = HIST_FIELD_FL_ALIAS | HIST_FIELD_FL_VAR;
+
+       alias = create_hist_field(hist_data, NULL, flags, var_name);
+       if (!alias)
+               return NULL;
+
+       alias->fn = var_ref->fn;
+       alias->operands[0] = var_ref;
+
+       if (init_var_ref(alias, var_ref, var_ref->system, var_ref->event_name)) {
+               destroy_hist_field(alias, 0);
+               return NULL;
+       }
+
+       return alias;
+}
+
+static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
+                                    struct trace_event_file *file, char *str,
+                                    unsigned long *flags, char *var_name)
+{
+       char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str;
+       struct ftrace_event_field *field = NULL;
+       struct hist_field *hist_field = NULL;
+       int ret = 0;
+
+       s = strchr(str, '.');
+       if (s) {
+               s = strchr(++s, '.');
+               if (s) {
+                       ref_system = strsep(&str, ".");
+                       if (!str) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       ref_event = strsep(&str, ".");
+                       if (!str) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       ref_var = str;
+               }
+       }
+
+       s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var);
+       if (!s) {
+               hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var);
+               if (hist_field) {
+                       hist_data->var_refs[hist_data->n_var_refs] = hist_field;
+                       hist_field->var_ref_idx = hist_data->n_var_refs++;
+                       if (var_name) {
+                               hist_field = create_alias(hist_data, hist_field, var_name);
+                               if (!hist_field) {
+                                       ret = -ENOMEM;
+                                       goto out;
+                               }
+                       }
+                       return hist_field;
+               }
+       } else
+               str = s;
+
+       field = parse_field(hist_data, file, str, flags);
+       if (IS_ERR(field)) {
+               ret = PTR_ERR(field);
+               goto out;
+       }
+
+       hist_field = create_hist_field(hist_data, field, *flags, var_name);
+       if (!hist_field) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       return hist_field;
+ out:
+       return ERR_PTR(ret);
+}
+
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+                                    struct trace_event_file *file,
+                                    char *str, unsigned long flags,
+                                    char *var_name, unsigned int level);
+
+static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
+                                     struct trace_event_file *file,
+                                     char *str, unsigned long flags,
+                                     char *var_name, unsigned int level)
+{
+       struct hist_field *operand1, *expr = NULL;
+       unsigned long operand_flags;
+       int ret = 0;
+       char *s;
+
+       /* we support only -(xxx) i.e. explicit parens required */
+
+       if (level > 3) {
+               hist_err("Too many subexpressions (3 max): ", str);
+               ret = -EINVAL;
+               goto free;
+       }
+
+       str++; /* skip leading '-' */
+
+       s = strchr(str, '(');
+       if (s)
+               str++;
+       else {
+               ret = -EINVAL;
+               goto free;
+       }
+
+       s = strrchr(str, ')');
+       if (s)
+               *s = '\0';
+       else {
+               ret = -EINVAL; /* no closing ')' */
+               goto free;
+       }
+
+       flags |= HIST_FIELD_FL_EXPR;
+       expr = create_hist_field(hist_data, NULL, flags, var_name);
+       if (!expr) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
+       operand_flags = 0;
+       operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+       if (IS_ERR(operand1)) {
+               ret = PTR_ERR(operand1);
+               goto free;
+       }
+
+       expr->flags |= operand1->flags &
+               (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+       expr->fn = hist_field_unary_minus;
+       expr->operands[0] = operand1;
+       expr->operator = FIELD_OP_UNARY_MINUS;
+       expr->name = expr_str(expr, 0);
+       expr->type = kstrdup(operand1->type, GFP_KERNEL);
+       if (!expr->type) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
+       return expr;
+ free:
+       destroy_hist_field(expr, 0);
+       return ERR_PTR(ret);
+}
+
+static int check_expr_operands(struct hist_field *operand1,
+                              struct hist_field *operand2)
+{
+       unsigned long operand1_flags = operand1->flags;
+       unsigned long operand2_flags = operand2->flags;
+
+       if ((operand1_flags & HIST_FIELD_FL_VAR_REF) ||
+           (operand1_flags & HIST_FIELD_FL_ALIAS)) {
+               struct hist_field *var;
+
+               var = find_var_field(operand1->var.hist_data, operand1->name);
+               if (!var)
+                       return -EINVAL;
+               operand1_flags = var->flags;
+       }
+
+       if ((operand2_flags & HIST_FIELD_FL_VAR_REF) ||
+           (operand2_flags & HIST_FIELD_FL_ALIAS)) {
+               struct hist_field *var;
+
+               var = find_var_field(operand2->var.hist_data, operand2->name);
+               if (!var)
+                       return -EINVAL;
+               operand2_flags = var->flags;
+       }
+
+       if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) !=
+           (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) {
+               hist_err("Timestamp units in expression don't match", NULL);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+                                    struct trace_event_file *file,
+                                    char *str, unsigned long flags,
+                                    char *var_name, unsigned int level)
+{
+       struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL;
+       unsigned long operand_flags;
+       int field_op, ret = -EINVAL;
+       char *sep, *operand1_str;
+
+       if (level > 3) {
+               hist_err("Too many subexpressions (3 max): ", str);
+               return ERR_PTR(-EINVAL);
+       }
+
+       field_op = contains_operator(str);
+
+       if (field_op == FIELD_OP_NONE)
+               return parse_atom(hist_data, file, str, &flags, var_name);
+
+       if (field_op == FIELD_OP_UNARY_MINUS)
+               return parse_unary(hist_data, file, str, flags, var_name, ++level);
+
+       switch (field_op) {
+       case FIELD_OP_MINUS:
+               sep = "-";
+               break;
+       case FIELD_OP_PLUS:
+               sep = "+";
+               break;
+       default:
+               goto free;
+       }
+
+       operand1_str = strsep(&str, sep);
+       if (!operand1_str || !str)
+               goto free;
+
+       operand_flags = 0;
+       operand1 = parse_atom(hist_data, file, operand1_str,
+                             &operand_flags, NULL);
+       if (IS_ERR(operand1)) {
+               ret = PTR_ERR(operand1);
+               operand1 = NULL;
+               goto free;
+       }
+
+       /* rest of string could be another expression e.g. b+c in a+b+c */
+       operand_flags = 0;
+       operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+       if (IS_ERR(operand2)) {
+               ret = PTR_ERR(operand2);
+               operand2 = NULL;
+               goto free;
+       }
+
+       ret = check_expr_operands(operand1, operand2);
+       if (ret)
+               goto free;
+
+       flags |= HIST_FIELD_FL_EXPR;
+
+       flags |= operand1->flags &
+               (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+
+       expr = create_hist_field(hist_data, NULL, flags, var_name);
+       if (!expr) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
+       operand1->read_once = true;
+       operand2->read_once = true;
+
+       expr->operands[0] = operand1;
+       expr->operands[1] = operand2;
+       expr->operator = field_op;
+       expr->name = expr_str(expr, 0);
+       expr->type = kstrdup(operand1->type, GFP_KERNEL);
+       if (!expr->type) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
+       switch (field_op) {
+       case FIELD_OP_MINUS:
+               expr->fn = hist_field_minus;
+               break;
+       case FIELD_OP_PLUS:
+               expr->fn = hist_field_plus;
+               break;
+       default:
+               ret = -EINVAL;
+               goto free;
+       }
+
+       return expr;
+ free:
+       destroy_hist_field(operand1, 0);
+       destroy_hist_field(operand2, 0);
+       destroy_hist_field(expr, 0);
+
+       return ERR_PTR(ret);
+}
+
+static char *find_trigger_filter(struct hist_trigger_data *hist_data,
+                                struct trace_event_file *file)
+{
+       struct event_trigger_data *test;
+
+       list_for_each_entry_rcu(test, &file->triggers, list) {
+               if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+                       if (test->private_data == hist_data)
+                               return test->filter_str;
+               }
+       }
+
+       return NULL;
+}
+
+static struct event_command trigger_hist_cmd;
+static int event_hist_trigger_func(struct event_command *cmd_ops,
+                                  struct trace_event_file *file,
+                                  char *glob, char *cmd, char *param);
+
+static bool compatible_keys(struct hist_trigger_data *target_hist_data,
+                           struct hist_trigger_data *hist_data,
+                           unsigned int n_keys)
+{
+       struct hist_field *target_hist_field, *hist_field;
+       unsigned int n, i, j;
+
+       if (hist_data->n_fields - hist_data->n_vals != n_keys)
+               return false;
+
+       i = hist_data->n_vals;
+       j = target_hist_data->n_vals;
+
+       for (n = 0; n < n_keys; n++) {
+               hist_field = hist_data->fields[i + n];
+               target_hist_field = target_hist_data->fields[j + n];
+
+               if (strcmp(hist_field->type, target_hist_field->type) != 0)
+                       return false;
+               if (hist_field->size != target_hist_field->size)
+                       return false;
+               if (hist_field->is_signed != target_hist_field->is_signed)
+                       return false;
+       }
+
+       return true;
+}
+
+static struct hist_trigger_data *
+find_compatible_hist(struct hist_trigger_data *target_hist_data,
+                    struct trace_event_file *file)
+{
+       struct hist_trigger_data *hist_data;
+       struct event_trigger_data *test;
+       unsigned int n_keys;
+
+       n_keys = target_hist_data->n_fields - target_hist_data->n_vals;
+
+       list_for_each_entry_rcu(test, &file->triggers, list) {
+               if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+                       hist_data = test->private_data;
+
+                       if (compatible_keys(target_hist_data, hist_data, n_keys))
+                               return hist_data;
+               }
+       }
+
+       return NULL;
+}
+
+static struct trace_event_file *event_file(struct trace_array *tr,
+                                          char *system, char *event_name)
+{
+       struct trace_event_file *file;
+
+       file = find_event_file(tr, system, event_name);
+       if (!file)
+               return ERR_PTR(-EINVAL);
+
+       return file;
+}
+
+static struct hist_field *
+find_synthetic_field_var(struct hist_trigger_data *target_hist_data,
+                        char *system, char *event_name, char *field_name)
+{
+       struct hist_field *event_var;
+       char *synthetic_name;
+
+       synthetic_name = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+       if (!synthetic_name)
+               return ERR_PTR(-ENOMEM);
+
+       strcpy(synthetic_name, "synthetic_");
+       strcat(synthetic_name, field_name);
+
+       event_var = find_event_var(target_hist_data, system, event_name, synthetic_name);
+
+       kfree(synthetic_name);
+
+       return event_var;
+}
+
+/**
+ * create_field_var_hist - Automatically create a histogram and var for a field
+ * @target_hist_data: The target hist trigger
+ * @subsys_name: Optional subsystem name
+ * @event_name: Optional event name
+ * @field_name: The name of the field (and the resulting variable)
+ *
+ * Hist trigger actions fetch data from variables, not directly from
+ * events.  However, for convenience, users are allowed to directly
+ * specify an event field in an action, which will be automatically
+ * converted into a variable on their behalf.
+
+ * If a user specifies a field on an event that isn't the event the
+ * histogram currently being defined (the target event histogram), the
+ * only way that can be accomplished is if a new hist trigger is
+ * created and the field variable defined on that.
+ *
+ * This function creates a new histogram compatible with the target
+ * event (meaning a histogram with the same key as the target
+ * histogram), and creates a variable for the specified field, but
+ * with 'synthetic_' prepended to the variable name in order to avoid
+ * collision with normal field variables.
+ *
+ * Return: The variable created for the field.
+ */
+static struct hist_field *
+create_field_var_hist(struct hist_trigger_data *target_hist_data,
+                     char *subsys_name, char *event_name, char *field_name)
+{
+       struct trace_array *tr = target_hist_data->event_file->tr;
+       struct hist_field *event_var = ERR_PTR(-EINVAL);
+       struct hist_trigger_data *hist_data;
+       unsigned int i, n, first = true;
+       struct field_var_hist *var_hist;
+       struct trace_event_file *file;
+       struct hist_field *key_field;
+       char *saved_filter;
+       char *cmd;
+       int ret;
+
+       if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) {
+               hist_err_event("onmatch: Too many field variables defined: ",
+                              subsys_name, event_name, field_name);
+               return ERR_PTR(-EINVAL);
+       }
+
+       file = event_file(tr, subsys_name, event_name);
+
+       if (IS_ERR(file)) {
+               hist_err_event("onmatch: Event file not found: ",
+                              subsys_name, event_name, field_name);
+               ret = PTR_ERR(file);
+               return ERR_PTR(ret);
+       }
+
+       /*
+        * Look for a histogram compatible with target.  We'll use the
+        * found histogram specification to create a new matching
+        * histogram with our variable on it.  target_hist_data is not
+        * yet a registered histogram so we can't use that.
+        */
+       hist_data = find_compatible_hist(target_hist_data, file);
+       if (!hist_data) {
+               hist_err_event("onmatch: Matching event histogram not found: ",
+                              subsys_name, event_name, field_name);
+               return ERR_PTR(-EINVAL);
+       }
+
+       /* See if a synthetic field variable has already been created */
+       event_var = find_synthetic_field_var(target_hist_data, subsys_name,
+                                            event_name, field_name);
+       if (!IS_ERR_OR_NULL(event_var))
+               return event_var;
+
+       var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL);
+       if (!var_hist)
+               return ERR_PTR(-ENOMEM);
+
+       cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+       if (!cmd) {
+               kfree(var_hist);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       /* Use the same keys as the compatible histogram */
+       strcat(cmd, "keys=");
+
+       for_each_hist_key_field(i, hist_data) {
+               key_field = hist_data->fields[i];
+               if (!first)
+                       strcat(cmd, ",");
+               strcat(cmd, key_field->field->name);
+               first = false;
+       }
+
+       /* Create the synthetic field variable specification */
+       strcat(cmd, ":synthetic_");
+       strcat(cmd, field_name);
+       strcat(cmd, "=");
+       strcat(cmd, field_name);
+
+       /* Use the same filter as the compatible histogram */
+       saved_filter = find_trigger_filter(hist_data, file);
+       if (saved_filter) {
+               strcat(cmd, " if ");
+               strcat(cmd, saved_filter);
+       }
+
+       var_hist->cmd = kstrdup(cmd, GFP_KERNEL);
+       if (!var_hist->cmd) {
+               kfree(cmd);
+               kfree(var_hist);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       /* Save the compatible histogram information */
+       var_hist->hist_data = hist_data;
+
+       /* Create the new histogram with our variable */
+       ret = event_hist_trigger_func(&trigger_hist_cmd, file,
+                                     "", "hist", cmd);
+       if (ret) {
+               kfree(cmd);
+               kfree(var_hist->cmd);
+               kfree(var_hist);
+               hist_err_event("onmatch: Couldn't create histogram for field: ",
+                              subsys_name, event_name, field_name);
+               return ERR_PTR(ret);
+       }
+
+       kfree(cmd);
+
+       /* If we can't find the variable, something went wrong */
+       event_var = find_synthetic_field_var(target_hist_data, subsys_name,
+                                            event_name, field_name);
+       if (IS_ERR_OR_NULL(event_var)) {
+               kfree(var_hist->cmd);
+               kfree(var_hist);
+               hist_err_event("onmatch: Couldn't find synthetic variable: ",
+                              subsys_name, event_name, field_name);
+               return ERR_PTR(-EINVAL);
+       }
+
+       n = target_hist_data->n_field_var_hists;
+       target_hist_data->field_var_hists[n] = var_hist;
+       target_hist_data->n_field_var_hists++;
+
+       return event_var;
+}
+
+static struct hist_field *
+find_target_event_var(struct hist_trigger_data *hist_data,
+                     char *subsys_name, char *event_name, char *var_name)
+{
+       struct trace_event_file *file = hist_data->event_file;
+       struct hist_field *hist_field = NULL;
+
+       if (subsys_name) {
+               struct trace_event_call *call;
+
+               if (!event_name)
+                       return NULL;
+
+               call = file->event_call;
+
+               if (strcmp(subsys_name, call->class->system) != 0)
+                       return NULL;
+
+               if (strcmp(event_name, trace_event_name(call)) != 0)
+                       return NULL;
+       }
+
+       hist_field = find_var_field(hist_data, var_name);
+
+       return hist_field;
+}
+
+static inline void __update_field_vars(struct tracing_map_elt *elt,
+                                      struct ring_buffer_event *rbe,
+                                      void *rec,
+                                      struct field_var **field_vars,
+                                      unsigned int n_field_vars,
+                                      unsigned int field_var_str_start)
+{
+       struct hist_elt_data *elt_data = elt->private_data;
+       unsigned int i, j, var_idx;
+       u64 var_val;
+
+       for (i = 0, j = field_var_str_start; i < n_field_vars; i++) {
+               struct field_var *field_var = field_vars[i];
+               struct hist_field *var = field_var->var;
+               struct hist_field *val = field_var->val;
+
+               var_val = val->fn(val, elt, rbe, rec);
+               var_idx = var->var.idx;
+
+               if (val->flags & HIST_FIELD_FL_STRING) {
+                       char *str = elt_data->field_var_str[j++];
+                       char *val_str = (char *)(uintptr_t)var_val;
+
+                       strscpy(str, val_str, STR_VAR_LEN_MAX);
+                       var_val = (u64)(uintptr_t)str;
+               }
+               tracing_map_set_var(elt, var_idx, var_val);
+       }
+}
+
+static void update_field_vars(struct hist_trigger_data *hist_data,
+                             struct tracing_map_elt *elt,
+                             struct ring_buffer_event *rbe,
+                             void *rec)
+{
+       __update_field_vars(elt, rbe, rec, hist_data->field_vars,
+                           hist_data->n_field_vars, 0);
+}
+
+static void update_max_vars(struct hist_trigger_data *hist_data,
+                           struct tracing_map_elt *elt,
+                           struct ring_buffer_event *rbe,
+                           void *rec)
+{
+       __update_field_vars(elt, rbe, rec, hist_data->max_vars,
+                           hist_data->n_max_vars, hist_data->n_field_var_str);
+}
+
+static struct hist_field *create_var(struct hist_trigger_data *hist_data,
+                                    struct trace_event_file *file,
+                                    char *name, int size, const char *type)
+{
+       struct hist_field *var;
+       int idx;
+
+       if (find_var(hist_data, file, name) && !hist_data->remove) {
+               var = ERR_PTR(-EINVAL);
+               goto out;
+       }
+
+       var = kzalloc(sizeof(struct hist_field), GFP_KERNEL);
+       if (!var) {
+               var = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       idx = tracing_map_add_var(hist_data->map);
+       if (idx < 0) {
+               kfree(var);
+               var = ERR_PTR(-EINVAL);
+               goto out;
+       }
+
+       var->flags = HIST_FIELD_FL_VAR;
+       var->var.idx = idx;
+       var->var.hist_data = var->hist_data = hist_data;
+       var->size = size;
+       var->var.name = kstrdup(name, GFP_KERNEL);
+       var->type = kstrdup(type, GFP_KERNEL);
+       if (!var->var.name || !var->type) {
+               kfree(var->var.name);
+               kfree(var->type);
+               kfree(var);
+               var = ERR_PTR(-ENOMEM);
+       }
+ out:
+       return var;
+}
+
+static struct field_var *create_field_var(struct hist_trigger_data *hist_data,
+                                         struct trace_event_file *file,
+                                         char *field_name)
+{
+       struct hist_field *val = NULL, *var = NULL;
+       unsigned long flags = HIST_FIELD_FL_VAR;
+       struct field_var *field_var;
+       int ret = 0;
+
+       if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) {
+               hist_err("Too many field variables defined: ", field_name);
+               ret = -EINVAL;
+               goto err;
+       }
+
+       val = parse_atom(hist_data, file, field_name, &flags, NULL);
+       if (IS_ERR(val)) {
+               hist_err("Couldn't parse field variable: ", field_name);
+               ret = PTR_ERR(val);
+               goto err;
+       }
+
+       var = create_var(hist_data, file, field_name, val->size, val->type);
+       if (IS_ERR(var)) {
+               hist_err("Couldn't create or find variable: ", field_name);
+               kfree(val);
+               ret = PTR_ERR(var);
+               goto err;
+       }
+
+       field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL);
+       if (!field_var) {
+               kfree(val);
+               kfree(var);
+               ret =  -ENOMEM;
+               goto err;
+       }
+
+       field_var->var = var;
+       field_var->val = val;
+ out:
+       return field_var;
+ err:
+       field_var = ERR_PTR(ret);
+       goto out;
+}
+
+/**
+ * create_target_field_var - Automatically create a variable for a field
+ * @target_hist_data: The target hist trigger
+ * @subsys_name: Optional subsystem name
+ * @event_name: Optional event name
+ * @var_name: The name of the field (and the resulting variable)
+ *
+ * Hist trigger actions fetch data from variables, not directly from
+ * events.  However, for convenience, users are allowed to directly
+ * specify an event field in an action, which will be automatically
+ * converted into a variable on their behalf.
+
+ * This function creates a field variable with the name var_name on
+ * the hist trigger currently being defined on the target event.  If
+ * subsys_name and event_name are specified, this function simply
+ * verifies that they do in fact match the target event subsystem and
+ * event name.
+ *
+ * Return: The variable created for the field.
+ */
+static struct field_var *
+create_target_field_var(struct hist_trigger_data *target_hist_data,
+                       char *subsys_name, char *event_name, char *var_name)
+{
+       struct trace_event_file *file = target_hist_data->event_file;
+
+       if (subsys_name) {
+               struct trace_event_call *call;
+
+               if (!event_name)
+                       return NULL;
+
+               call = file->event_call;
+
+               if (strcmp(subsys_name, call->class->system) != 0)
+                       return NULL;
+
+               if (strcmp(event_name, trace_event_name(call)) != 0)
+                       return NULL;
+       }
+
+       return create_field_var(target_hist_data, file, var_name);
+}
+
+static void onmax_print(struct seq_file *m,
+                       struct hist_trigger_data *hist_data,
+                       struct tracing_map_elt *elt,
+                       struct action_data *data)
+{
+       unsigned int i, save_var_idx, max_idx = data->onmax.max_var->var.idx;
+
+       seq_printf(m, "\n\tmax: %10llu", tracing_map_read_var(elt, max_idx));
+
+       for (i = 0; i < hist_data->n_max_vars; i++) {
+               struct hist_field *save_val = hist_data->max_vars[i]->val;
+               struct hist_field *save_var = hist_data->max_vars[i]->var;
+               u64 val;
+
+               save_var_idx = save_var->var.idx;
+
+               val = tracing_map_read_var(elt, save_var_idx);
+
+               if (save_val->flags & HIST_FIELD_FL_STRING) {
+                       seq_printf(m, "  %s: %-32s", save_var->var.name,
+                                  (char *)(uintptr_t)(val));
+               } else
+                       seq_printf(m, "  %s: %10llu", save_var->var.name, val);
+       }
+}
+
+static void onmax_save(struct hist_trigger_data *hist_data,
+                      struct tracing_map_elt *elt, void *rec,
+                      struct ring_buffer_event *rbe,
+                      struct action_data *data, u64 *var_ref_vals)
+{
+       unsigned int max_idx = data->onmax.max_var->var.idx;
+       unsigned int max_var_ref_idx = data->onmax.max_var_ref_idx;
+
+       u64 var_val, max_val;
+
+       var_val = var_ref_vals[max_var_ref_idx];
+       max_val = tracing_map_read_var(elt, max_idx);
+
+       if (var_val <= max_val)
+               return;
+
+       tracing_map_set_var(elt, max_idx, var_val);
+
+       update_max_vars(hist_data, elt, rbe, rec);
+}
+
+static void onmax_destroy(struct action_data *data)
+{
+       unsigned int i;
+
+       destroy_hist_field(data->onmax.max_var, 0);
+       destroy_hist_field(data->onmax.var, 0);
+
+       kfree(data->onmax.var_str);
+       kfree(data->onmax.fn_name);
+
+       for (i = 0; i < data->n_params; i++)
+               kfree(data->params[i]);
+
+       kfree(data);
+}
+
+static int onmax_create(struct hist_trigger_data *hist_data,
+                       struct action_data *data)
+{
+       struct trace_event_file *file = hist_data->event_file;
+       struct hist_field *var_field, *ref_field, *max_var;
+       unsigned int var_ref_idx = hist_data->n_var_refs;
+       struct field_var *field_var;
+       char *onmax_var_str, *param;
+       unsigned long flags;
+       unsigned int i;
+       int ret = 0;
+
+       onmax_var_str = data->onmax.var_str;
+       if (onmax_var_str[0] != '$') {
+               hist_err("onmax: For onmax(x), x must be a variable: ", onmax_var_str);
+               return -EINVAL;
+       }
+       onmax_var_str++;
+
+       var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str);
+       if (!var_field) {
+               hist_err("onmax: Couldn't find onmax variable: ", onmax_var_str);
+               return -EINVAL;
+       }
+
+       flags = HIST_FIELD_FL_VAR_REF;
+       ref_field = create_hist_field(hist_data, NULL, flags, NULL);
+       if (!ref_field)
+               return -ENOMEM;
+
+       if (init_var_ref(ref_field, var_field, NULL, NULL)) {
+               destroy_hist_field(ref_field, 0);
+               ret = -ENOMEM;
+               goto out;
+       }
+       hist_data->var_refs[hist_data->n_var_refs] = ref_field;
+       ref_field->var_ref_idx = hist_data->n_var_refs++;
+       data->onmax.var = ref_field;
+
+       data->fn = onmax_save;
+       data->onmax.max_var_ref_idx = var_ref_idx;
+       max_var = create_var(hist_data, file, "max", sizeof(u64), "u64");
+       if (IS_ERR(max_var)) {
+               hist_err("onmax: Couldn't create onmax variable: ", "max");
+               ret = PTR_ERR(max_var);
                goto out;
        }
+       data->onmax.max_var = max_var;
+
+       for (i = 0; i < data->n_params; i++) {
+               param = kstrdup(data->params[i], GFP_KERNEL);
+               if (!param) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               field_var = create_target_field_var(hist_data, NULL, NULL, param);
+               if (IS_ERR(field_var)) {
+                       hist_err("onmax: Couldn't create field variable: ", param);
+                       ret = PTR_ERR(field_var);
+                       kfree(param);
+                       goto out;
+               }
+
+               hist_data->max_vars[hist_data->n_max_vars++] = field_var;
+               if (field_var->val->flags & HIST_FIELD_FL_STRING)
+                       hist_data->n_max_var_str++;
+
+               kfree(param);
+       }
+ out:
+       return ret;
+}
+
+static int parse_action_params(char *params, struct action_data *data)
+{
+       char *param, *saved_param;
+       int ret = 0;
+
+       while (params) {
+               if (data->n_params >= SYNTH_FIELDS_MAX)
+                       goto out;
+
+               param = strsep(&params, ",");
+               if (!param) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               param = strstrip(param);
+               if (strlen(param) < 2) {
+                       hist_err("Invalid action param: ", param);
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               saved_param = kstrdup(param, GFP_KERNEL);
+               if (!saved_param) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               data->params[data->n_params++] = saved_param;
+       }
+ out:
+       return ret;
+}
+
+static struct action_data *onmax_parse(char *str)
+{
+       char *onmax_fn_name, *onmax_var_str;
+       struct action_data *data;
+       int ret = -EINVAL;
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return ERR_PTR(-ENOMEM);
+
+       onmax_var_str = strsep(&str, ")");
+       if (!onmax_var_str || !str) {
+               ret = -EINVAL;
+               goto free;
+       }
+
+       data->onmax.var_str = kstrdup(onmax_var_str, GFP_KERNEL);
+       if (!data->onmax.var_str) {
+               ret = -ENOMEM;
+               goto free;
+       }
+
+       strsep(&str, ".");
+       if (!str)
+               goto free;
+
+       onmax_fn_name = strsep(&str, "(");
+       if (!onmax_fn_name || !str)
+               goto free;
+
+       if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) {
+               char *params = strsep(&str, ")");
+
+               if (!params) {
+                       ret = -EINVAL;
+                       goto free;
+               }
+
+               ret = parse_action_params(params, data);
+               if (ret)
+                       goto free;
+       } else
+               goto free;
+
+       data->onmax.fn_name = kstrdup(onmax_fn_name, GFP_KERNEL);
+       if (!data->onmax.fn_name) {
+               ret = -ENOMEM;
+               goto free;
+       }
+ out:
+       return data;
+ free:
+       onmax_destroy(data);
+       data = ERR_PTR(ret);
+       goto out;
+}
+
+static void onmatch_destroy(struct action_data *data)
+{
+       unsigned int i;
+
+       mutex_lock(&synth_event_mutex);
+
+       kfree(data->onmatch.match_event);
+       kfree(data->onmatch.match_event_system);
+       kfree(data->onmatch.synth_event_name);
+
+       for (i = 0; i < data->n_params; i++)
+               kfree(data->params[i]);
+
+       if (data->onmatch.synth_event)
+               data->onmatch.synth_event->ref--;
+
+       kfree(data);
+
+       mutex_unlock(&synth_event_mutex);
+}
+
+static void destroy_field_var(struct field_var *field_var)
+{
+       if (!field_var)
+               return;
+
+       destroy_hist_field(field_var->var, 0);
+       destroy_hist_field(field_var->val, 0);
+
+       kfree(field_var);
+}
+
+static void destroy_field_vars(struct hist_trigger_data *hist_data)
+{
+       unsigned int i;
+
+       for (i = 0; i < hist_data->n_field_vars; i++)
+               destroy_field_var(hist_data->field_vars[i]);
+}
+
+static void save_field_var(struct hist_trigger_data *hist_data,
+                          struct field_var *field_var)
+{
+       hist_data->field_vars[hist_data->n_field_vars++] = field_var;
+
+       if (field_var->val->flags & HIST_FIELD_FL_STRING)
+               hist_data->n_field_var_str++;
+}
+
+
+static void destroy_synth_var_refs(struct hist_trigger_data *hist_data)
+{
+       unsigned int i;
+
+       for (i = 0; i < hist_data->n_synth_var_refs; i++)
+               destroy_hist_field(hist_data->synth_var_refs[i], 0);
+}
+
+static void save_synth_var_ref(struct hist_trigger_data *hist_data,
+                        struct hist_field *var_ref)
+{
+       hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref;
+
+       hist_data->var_refs[hist_data->n_var_refs] = var_ref;
+       var_ref->var_ref_idx = hist_data->n_var_refs++;
+}
+
+static int check_synth_field(struct synth_event *event,
+                            struct hist_field *hist_field,
+                            unsigned int field_pos)
+{
+       struct synth_field *field;
+
+       if (field_pos >= event->n_fields)
+               return -EINVAL;
+
+       field = event->fields[field_pos];
+
+       if (strcmp(field->type, hist_field->type) != 0)
+               return -EINVAL;
+
+       return 0;
+}
+
+static struct hist_field *
+onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data,
+                char *system, char *event, char *var)
+{
+       struct hist_field *hist_field;
+
+       var++; /* skip '$' */
+
+       hist_field = find_target_event_var(hist_data, system, event, var);
+       if (!hist_field) {
+               if (!system) {
+                       system = data->onmatch.match_event_system;
+                       event = data->onmatch.match_event;
+               }
+
+               hist_field = find_event_var(hist_data, system, event, var);
+       }
+
+       if (!hist_field)
+               hist_err_event("onmatch: Couldn't find onmatch param: $", system, event, var);
+
+       return hist_field;
+}
+
+static struct hist_field *
+onmatch_create_field_var(struct hist_trigger_data *hist_data,
+                        struct action_data *data, char *system,
+                        char *event, char *var)
+{
+       struct hist_field *hist_field = NULL;
+       struct field_var *field_var;
+
+       /*
+        * First try to create a field var on the target event (the
+        * currently being defined).  This will create a variable for
+        * unqualified fields on the target event, or if qualified,
+        * target fields that have qualified names matching the target.
+        */
+       field_var = create_target_field_var(hist_data, system, event, var);
+
+       if (field_var && !IS_ERR(field_var)) {
+               save_field_var(hist_data, field_var);
+               hist_field = field_var->var;
+       } else {
+               field_var = NULL;
+               /*
+                * If no explicit system.event is specfied, default to
+                * looking for fields on the onmatch(system.event.xxx)
+                * event.
+                */
+               if (!system) {
+                       system = data->onmatch.match_event_system;
+                       event = data->onmatch.match_event;
+               }
+
+               /*
+                * At this point, we're looking at a field on another
+                * event.  Because we can't modify a hist trigger on
+                * another event to add a variable for a field, we need
+                * to create a new trigger on that event and create the
+                * variable at the same time.
+                */
+               hist_field = create_field_var_hist(hist_data, system, event, var);
+               if (IS_ERR(hist_field))
+                       goto free;
+       }
+ out:
+       return hist_field;
+ free:
+       destroy_field_var(field_var);
+       hist_field = NULL;
+       goto out;
+}
+
+static int onmatch_create(struct hist_trigger_data *hist_data,
+                         struct trace_event_file *file,
+                         struct action_data *data)
+{
+       char *event_name, *param, *system = NULL;
+       struct hist_field *hist_field, *var_ref;
+       unsigned int i, var_ref_idx;
+       unsigned int field_pos = 0;
+       struct synth_event *event;
+       int ret = 0;
+
+       mutex_lock(&synth_event_mutex);
+       event = find_synth_event(data->onmatch.synth_event_name);
+       if (!event) {
+               hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name);
+               mutex_unlock(&synth_event_mutex);
+               return -EINVAL;
+       }
+       event->ref++;
+       mutex_unlock(&synth_event_mutex);
+
+       var_ref_idx = hist_data->n_var_refs;
+
+       for (i = 0; i < data->n_params; i++) {
+               char *p;
+
+               p = param = kstrdup(data->params[i], GFP_KERNEL);
+               if (!param) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               system = strsep(&param, ".");
+               if (!param) {
+                       param = (char *)system;
+                       system = event_name = NULL;
+               } else {
+                       event_name = strsep(&param, ".");
+                       if (!param) {
+                               kfree(p);
+                               ret = -EINVAL;
+                               goto err;
+                       }
+               }
+
+               if (param[0] == '$')
+                       hist_field = onmatch_find_var(hist_data, data, system,
+                                                     event_name, param);
+               else
+                       hist_field = onmatch_create_field_var(hist_data, data,
+                                                             system,
+                                                             event_name,
+                                                             param);
+
+               if (!hist_field) {
+                       kfree(p);
+                       ret = -EINVAL;
+                       goto err;
+               }
+
+               if (check_synth_field(event, hist_field, field_pos) == 0) {
+                       var_ref = create_var_ref(hist_field, system, event_name);
+                       if (!var_ref) {
+                               kfree(p);
+                               ret = -ENOMEM;
+                               goto err;
+                       }
+
+                       save_synth_var_ref(hist_data, var_ref);
+                       field_pos++;
+                       kfree(p);
+                       continue;
+               }
+
+               hist_err_event("onmatch: Param type doesn't match synthetic event field type: ",
+                              system, event_name, param);
+               kfree(p);
+               ret = -EINVAL;
+               goto err;
+       }
+
+       if (field_pos != event->n_fields) {
+               hist_err("onmatch: Param count doesn't match synthetic event field count: ", event->name);
+               ret = -EINVAL;
+               goto err;
+       }
+
+       data->fn = action_trace;
+       data->onmatch.synth_event = event;
+       data->onmatch.var_ref_idx = var_ref_idx;
+ out:
+       return ret;
+ err:
+       mutex_lock(&synth_event_mutex);
+       event->ref--;
+       mutex_unlock(&synth_event_mutex);
+
+       goto out;
+}
+
+static struct action_data *onmatch_parse(struct trace_array *tr, char *str)
+{
+       char *match_event, *match_event_system;
+       char *synth_event_name, *params;
+       struct action_data *data;
+       int ret = -EINVAL;
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return ERR_PTR(-ENOMEM);
+
+       match_event = strsep(&str, ")");
+       if (!match_event || !str) {
+               hist_err("onmatch: Missing closing paren: ", match_event);
+               goto free;
+       }
+
+       match_event_system = strsep(&match_event, ".");
+       if (!match_event) {
+               hist_err("onmatch: Missing subsystem for match event: ", match_event_system);
+               goto free;
+       }
+
+       if (IS_ERR(event_file(tr, match_event_system, match_event))) {
+               hist_err_event("onmatch: Invalid subsystem or event name: ",
+                              match_event_system, match_event, NULL);
+               goto free;
+       }
 
-       if (WARN_ON_ONCE(!field))
-               goto out;
+       data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL);
+       if (!data->onmatch.match_event) {
+               ret = -ENOMEM;
+               goto free;
+       }
 
-       if (is_string_field(field)) {
-               flags |= HIST_FIELD_FL_STRING;
+       data->onmatch.match_event_system = kstrdup(match_event_system, GFP_KERNEL);
+       if (!data->onmatch.match_event_system) {
+               ret = -ENOMEM;
+               goto free;
+       }
 
-               if (field->filter_type == FILTER_STATIC_STRING)
-                       hist_field->fn = hist_field_string;
-               else if (field->filter_type == FILTER_DYN_STRING)
-                       hist_field->fn = hist_field_dynstring;
-               else
-                       hist_field->fn = hist_field_pstring;
-       } else {
-               hist_field->fn = select_value_fn(field->size,
-                                                field->is_signed);
-               if (!hist_field->fn) {
-                       destroy_hist_field(hist_field, 0);
-                       return NULL;
-               }
+       strsep(&str, ".");
+       if (!str) {
+               hist_err("onmatch: Missing . after onmatch(): ", str);
+               goto free;
        }
- out:
-       hist_field->field = field;
-       hist_field->flags = flags;
 
-       return hist_field;
-}
+       synth_event_name = strsep(&str, "(");
+       if (!synth_event_name || !str) {
+               hist_err("onmatch: Missing opening paramlist paren: ", synth_event_name);
+               goto free;
+       }
 
-static void destroy_hist_fields(struct hist_trigger_data *hist_data)
-{
-       unsigned int i;
+       data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL);
+       if (!data->onmatch.synth_event_name) {
+               ret = -ENOMEM;
+               goto free;
+       }
 
-       for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) {
-               if (hist_data->fields[i]) {
-                       destroy_hist_field(hist_data->fields[i], 0);
-                       hist_data->fields[i] = NULL;
-               }
+       params = strsep(&str, ")");
+       if (!params || !str || (str && strlen(str))) {
+               hist_err("onmatch: Missing closing paramlist paren: ", params);
+               goto free;
        }
+
+       ret = parse_action_params(params, data);
+       if (ret)
+               goto free;
+ out:
+       return data;
+ free:
+       onmatch_destroy(data);
+       data = ERR_PTR(ret);
+       goto out;
 }
 
 static int create_hitcount_val(struct hist_trigger_data *hist_data)
 {
        hist_data->fields[HITCOUNT_IDX] =
-               create_hist_field(NULL, HIST_FIELD_FL_HITCOUNT);
+               create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL);
        if (!hist_data->fields[HITCOUNT_IDX])
                return -ENOMEM;
 
        hist_data->n_vals++;
+       hist_data->n_fields++;
 
        if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
                return -EINVAL;
@@ -466,54 +3826,71 @@ static int create_hitcount_val(struct hist_trigger_data *hist_data)
        return 0;
 }
 
+static int __create_val_field(struct hist_trigger_data *hist_data,
+                             unsigned int val_idx,
+                             struct trace_event_file *file,
+                             char *var_name, char *field_str,
+                             unsigned long flags)
+{
+       struct hist_field *hist_field;
+       int ret = 0;
+
+       hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0);
+       if (IS_ERR(hist_field)) {
+               ret = PTR_ERR(hist_field);
+               goto out;
+       }
+
+       hist_data->fields[val_idx] = hist_field;
+
+       ++hist_data->n_vals;
+       ++hist_data->n_fields;
+
+       if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
+               ret = -EINVAL;
+ out:
+       return ret;
+}
+
 static int create_val_field(struct hist_trigger_data *hist_data,
                            unsigned int val_idx,
                            struct trace_event_file *file,
                            char *field_str)
 {
-       struct ftrace_event_field *field = NULL;
-       unsigned long flags = 0;
-       char *field_name;
-       int ret = 0;
-
        if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX))
                return -EINVAL;
 
-       field_name = strsep(&field_str, ".");
-       if (field_str) {
-               if (strcmp(field_str, "hex") == 0)
-                       flags |= HIST_FIELD_FL_HEX;
-               else {
-                       ret = -EINVAL;
-                       goto out;
-               }
-       }
+       return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0);
+}
 
-       field = trace_find_event_field(file->event_call, field_name);
-       if (!field || !field->size) {
-               ret = -EINVAL;
-               goto out;
-       }
+static int create_var_field(struct hist_trigger_data *hist_data,
+                           unsigned int val_idx,
+                           struct trace_event_file *file,
+                           char *var_name, char *expr_str)
+{
+       unsigned long flags = 0;
 
-       hist_data->fields[val_idx] = create_hist_field(field, flags);
-       if (!hist_data->fields[val_idx]) {
-               ret = -ENOMEM;
-               goto out;
+       if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
+               return -EINVAL;
+
+       if (find_var(hist_data, file, var_name) && !hist_data->remove) {
+               hist_err("Variable already defined: ", var_name);
+               return -EINVAL;
        }
 
-       ++hist_data->n_vals;
+       flags |= HIST_FIELD_FL_VAR;
+       hist_data->n_vars++;
+       if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX))
+               return -EINVAL;
 
-       if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
-               ret = -EINVAL;
- out:
-       return ret;
+       return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
 }
 
 static int create_val_fields(struct hist_trigger_data *hist_data,
                             struct trace_event_file *file)
 {
        char *fields_str, *field_str;
-       unsigned int i, j;
+       unsigned int i, j = 1;
        int ret;
 
        ret = create_hitcount_val(hist_data);
@@ -533,12 +3910,15 @@ static int create_val_fields(struct hist_trigger_data *hist_data,
                field_str = strsep(&fields_str, ",");
                if (!field_str)
                        break;
+
                if (strcmp(field_str, "hitcount") == 0)
                        continue;
+
                ret = create_val_field(hist_data, j++, file, field_str);
                if (ret)
                        goto out;
        }
+
        if (fields_str && (strcmp(fields_str, "hitcount") != 0))
                ret = -EINVAL;
  out:
@@ -551,12 +3931,13 @@ static int create_key_field(struct hist_trigger_data *hist_data,
                            struct trace_event_file *file,
                            char *field_str)
 {
-       struct ftrace_event_field *field = NULL;
+       struct hist_field *hist_field = NULL;
+
        unsigned long flags = 0;
        unsigned int key_size;
        int ret = 0;
 
-       if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX))
+       if (WARN_ON(key_idx >= HIST_FIELDS_MAX))
                return -EINVAL;
 
        flags |= HIST_FIELD_FL_KEY;
@@ -564,57 +3945,40 @@ static int create_key_field(struct hist_trigger_data *hist_data,
        if (strcmp(field_str, "stacktrace") == 0) {
                flags |= HIST_FIELD_FL_STACKTRACE;
                key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH;
+               hist_field = create_hist_field(hist_data, NULL, flags, NULL);
        } else {
-               char *field_name = strsep(&field_str, ".");
-
-               if (field_str) {
-                       if (strcmp(field_str, "hex") == 0)
-                               flags |= HIST_FIELD_FL_HEX;
-                       else if (strcmp(field_str, "sym") == 0)
-                               flags |= HIST_FIELD_FL_SYM;
-                       else if (strcmp(field_str, "sym-offset") == 0)
-                               flags |= HIST_FIELD_FL_SYM_OFFSET;
-                       else if ((strcmp(field_str, "execname") == 0) &&
-                                (strcmp(field_name, "common_pid") == 0))
-                               flags |= HIST_FIELD_FL_EXECNAME;
-                       else if (strcmp(field_str, "syscall") == 0)
-                               flags |= HIST_FIELD_FL_SYSCALL;
-                       else if (strcmp(field_str, "log2") == 0)
-                               flags |= HIST_FIELD_FL_LOG2;
-                       else {
-                               ret = -EINVAL;
-                               goto out;
-                       }
+               hist_field = parse_expr(hist_data, file, field_str, flags,
+                                       NULL, 0);
+               if (IS_ERR(hist_field)) {
+                       ret = PTR_ERR(hist_field);
+                       goto out;
                }
 
-               field = trace_find_event_field(file->event_call, field_name);
-               if (!field || !field->size) {
+               if (hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+                       hist_err("Using variable references as keys not supported: ", field_str);
+                       destroy_hist_field(hist_field, 0);
                        ret = -EINVAL;
                        goto out;
                }
 
-               if (is_string_field(field))
-                       key_size = MAX_FILTER_STR_VAL;
-               else
-                       key_size = field->size;
+               key_size = hist_field->size;
        }
 
-       hist_data->fields[key_idx] = create_hist_field(field, flags);
-       if (!hist_data->fields[key_idx]) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       hist_data->fields[key_idx] = hist_field;
 
        key_size = ALIGN(key_size, sizeof(u64));
        hist_data->fields[key_idx]->size = key_size;
        hist_data->fields[key_idx]->offset = key_offset;
+
        hist_data->key_size += key_size;
+
        if (hist_data->key_size > HIST_KEY_SIZE_MAX) {
                ret = -EINVAL;
                goto out;
        }
 
        hist_data->n_keys++;
+       hist_data->n_fields++;
 
        if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX))
                return -EINVAL;
@@ -658,21 +4022,113 @@ static int create_key_fields(struct hist_trigger_data *hist_data,
        return ret;
 }
 
+static int create_var_fields(struct hist_trigger_data *hist_data,
+                            struct trace_event_file *file)
+{
+       unsigned int i, j = hist_data->n_vals;
+       int ret = 0;
+
+       unsigned int n_vars = hist_data->attrs->var_defs.n_vars;
+
+       for (i = 0; i < n_vars; i++) {
+               char *var_name = hist_data->attrs->var_defs.name[i];
+               char *expr = hist_data->attrs->var_defs.expr[i];
+
+               ret = create_var_field(hist_data, j++, file, var_name, expr);
+               if (ret)
+                       goto out;
+       }
+ out:
+       return ret;
+}
+
+static void free_var_defs(struct hist_trigger_data *hist_data)
+{
+       unsigned int i;
+
+       for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+               kfree(hist_data->attrs->var_defs.name[i]);
+               kfree(hist_data->attrs->var_defs.expr[i]);
+       }
+
+       hist_data->attrs->var_defs.n_vars = 0;
+}
+
+static int parse_var_defs(struct hist_trigger_data *hist_data)
+{
+       char *s, *str, *var_name, *field_str;
+       unsigned int i, j, n_vars = 0;
+       int ret = 0;
+
+       for (i = 0; i < hist_data->attrs->n_assignments; i++) {
+               str = hist_data->attrs->assignment_str[i];
+               for (j = 0; j < TRACING_MAP_VARS_MAX; j++) {
+                       field_str = strsep(&str, ",");
+                       if (!field_str)
+                               break;
+
+                       var_name = strsep(&field_str, "=");
+                       if (!var_name || !field_str) {
+                               hist_err("Malformed assignment: ", var_name);
+                               ret = -EINVAL;
+                               goto free;
+                       }
+
+                       if (n_vars == TRACING_MAP_VARS_MAX) {
+                               hist_err("Too many variables defined: ", var_name);
+                               ret = -EINVAL;
+                               goto free;
+                       }
+
+                       s = kstrdup(var_name, GFP_KERNEL);
+                       if (!s) {
+                               ret = -ENOMEM;
+                               goto free;
+                       }
+                       hist_data->attrs->var_defs.name[n_vars] = s;
+
+                       s = kstrdup(field_str, GFP_KERNEL);
+                       if (!s) {
+                               kfree(hist_data->attrs->var_defs.name[n_vars]);
+                               ret = -ENOMEM;
+                               goto free;
+                       }
+                       hist_data->attrs->var_defs.expr[n_vars++] = s;
+
+                       hist_data->attrs->var_defs.n_vars = n_vars;
+               }
+       }
+
+       return ret;
+ free:
+       free_var_defs(hist_data);
+
+       return ret;
+}
+
 static int create_hist_fields(struct hist_trigger_data *hist_data,
                              struct trace_event_file *file)
 {
        int ret;
 
+       ret = parse_var_defs(hist_data);
+       if (ret)
+               goto out;
+
        ret = create_val_fields(hist_data, file);
        if (ret)
                goto out;
 
-       ret = create_key_fields(hist_data, file);
+       ret = create_var_fields(hist_data, file);
        if (ret)
                goto out;
 
-       hist_data->n_fields = hist_data->n_vals + hist_data->n_keys;
+       ret = create_key_fields(hist_data, file);
+       if (ret)
+               goto out;
  out:
+       free_var_defs(hist_data);
+
        return ret;
 }
 
@@ -695,7 +4151,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data)
        char *fields_str = hist_data->attrs->sort_key_str;
        struct tracing_map_sort_key *sort_key;
        int descending, ret = 0;
-       unsigned int i, j;
+       unsigned int i, j, k;
 
        hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */
 
@@ -743,12 +4199,19 @@ static int create_sort_keys(struct hist_trigger_data *hist_data)
                        continue;
                }
 
-               for (j = 1; j < hist_data->n_fields; j++) {
+               for (j = 1, k = 1; j < hist_data->n_fields; j++) {
+                       unsigned int idx;
+
                        hist_field = hist_data->fields[j];
+                       if (hist_field->flags & HIST_FIELD_FL_VAR)
+                               continue;
+
+                       idx = k++;
+
                        test_name = hist_field_name(hist_field, 0);
 
                        if (strcmp(field_name, test_name) == 0) {
-                               sort_key->field_idx = j;
+                               sort_key->field_idx = idx;
                                descending = is_descending(field_str);
                                if (descending < 0) {
                                        ret = descending;
@@ -763,16 +4226,230 @@ static int create_sort_keys(struct hist_trigger_data *hist_data)
                        break;
                }
        }
-       hist_data->n_sort_keys = i;
- out:
-       return ret;
+
+       hist_data->n_sort_keys = i;
+ out:
+       return ret;
+}
+
+static void destroy_actions(struct hist_trigger_data *hist_data)
+{
+       unsigned int i;
+
+       for (i = 0; i < hist_data->n_actions; i++) {
+               struct action_data *data = hist_data->actions[i];
+
+               if (data->fn == action_trace)
+                       onmatch_destroy(data);
+               else if (data->fn == onmax_save)
+                       onmax_destroy(data);
+               else
+                       kfree(data);
+       }
+}
+
+static int parse_actions(struct hist_trigger_data *hist_data)
+{
+       struct trace_array *tr = hist_data->event_file->tr;
+       struct action_data *data;
+       unsigned int i;
+       int ret = 0;
+       char *str;
+
+       for (i = 0; i < hist_data->attrs->n_actions; i++) {
+               str = hist_data->attrs->action_str[i];
+
+               if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) {
+                       char *action_str = str + strlen("onmatch(");
+
+                       data = onmatch_parse(tr, action_str);
+                       if (IS_ERR(data)) {
+                               ret = PTR_ERR(data);
+                               break;
+                       }
+                       data->fn = action_trace;
+               } else if (strncmp(str, "onmax(", strlen("onmax(")) == 0) {
+                       char *action_str = str + strlen("onmax(");
+
+                       data = onmax_parse(action_str);
+                       if (IS_ERR(data)) {
+                               ret = PTR_ERR(data);
+                               break;
+                       }
+                       data->fn = onmax_save;
+               } else {
+                       ret = -EINVAL;
+                       break;
+               }
+
+               hist_data->actions[hist_data->n_actions++] = data;
+       }
+
+       return ret;
+}
+
+static int create_actions(struct hist_trigger_data *hist_data,
+                         struct trace_event_file *file)
+{
+       struct action_data *data;
+       unsigned int i;
+       int ret = 0;
+
+       for (i = 0; i < hist_data->attrs->n_actions; i++) {
+               data = hist_data->actions[i];
+
+               if (data->fn == action_trace) {
+                       ret = onmatch_create(hist_data, file, data);
+                       if (ret)
+                               return ret;
+               } else if (data->fn == onmax_save) {
+                       ret = onmax_create(hist_data, data);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return ret;
+}
+
+static void print_actions(struct seq_file *m,
+                         struct hist_trigger_data *hist_data,
+                         struct tracing_map_elt *elt)
+{
+       unsigned int i;
+
+       for (i = 0; i < hist_data->n_actions; i++) {
+               struct action_data *data = hist_data->actions[i];
+
+               if (data->fn == onmax_save)
+                       onmax_print(m, hist_data, elt, data);
+       }
+}
+
+static void print_onmax_spec(struct seq_file *m,
+                            struct hist_trigger_data *hist_data,
+                            struct action_data *data)
+{
+       unsigned int i;
+
+       seq_puts(m, ":onmax(");
+       seq_printf(m, "%s", data->onmax.var_str);
+       seq_printf(m, ").%s(", data->onmax.fn_name);
+
+       for (i = 0; i < hist_data->n_max_vars; i++) {
+               seq_printf(m, "%s", hist_data->max_vars[i]->var->var.name);
+               if (i < hist_data->n_max_vars - 1)
+                       seq_puts(m, ",");
+       }
+       seq_puts(m, ")");
+}
+
+static void print_onmatch_spec(struct seq_file *m,
+                              struct hist_trigger_data *hist_data,
+                              struct action_data *data)
+{
+       unsigned int i;
+
+       seq_printf(m, ":onmatch(%s.%s).", data->onmatch.match_event_system,
+                  data->onmatch.match_event);
+
+       seq_printf(m, "%s(", data->onmatch.synth_event->name);
+
+       for (i = 0; i < data->n_params; i++) {
+               if (i)
+                       seq_puts(m, ",");
+               seq_printf(m, "%s", data->params[i]);
+       }
+
+       seq_puts(m, ")");
+}
+
+static bool actions_match(struct hist_trigger_data *hist_data,
+                         struct hist_trigger_data *hist_data_test)
+{
+       unsigned int i, j;
+
+       if (hist_data->n_actions != hist_data_test->n_actions)
+               return false;
+
+       for (i = 0; i < hist_data->n_actions; i++) {
+               struct action_data *data = hist_data->actions[i];
+               struct action_data *data_test = hist_data_test->actions[i];
+
+               if (data->fn != data_test->fn)
+                       return false;
+
+               if (data->n_params != data_test->n_params)
+                       return false;
+
+               for (j = 0; j < data->n_params; j++) {
+                       if (strcmp(data->params[j], data_test->params[j]) != 0)
+                               return false;
+               }
+
+               if (data->fn == action_trace) {
+                       if (strcmp(data->onmatch.synth_event_name,
+                                  data_test->onmatch.synth_event_name) != 0)
+                               return false;
+                       if (strcmp(data->onmatch.match_event_system,
+                                  data_test->onmatch.match_event_system) != 0)
+                               return false;
+                       if (strcmp(data->onmatch.match_event,
+                                  data_test->onmatch.match_event) != 0)
+                               return false;
+               } else if (data->fn == onmax_save) {
+                       if (strcmp(data->onmax.var_str,
+                                  data_test->onmax.var_str) != 0)
+                               return false;
+                       if (strcmp(data->onmax.fn_name,
+                                  data_test->onmax.fn_name) != 0)
+                               return false;
+               }
+       }
+
+       return true;
+}
+
+
+static void print_actions_spec(struct seq_file *m,
+                              struct hist_trigger_data *hist_data)
+{
+       unsigned int i;
+
+       for (i = 0; i < hist_data->n_actions; i++) {
+               struct action_data *data = hist_data->actions[i];
+
+               if (data->fn == action_trace)
+                       print_onmatch_spec(m, hist_data, data);
+               else if (data->fn == onmax_save)
+                       print_onmax_spec(m, hist_data, data);
+       }
+}
+
+static void destroy_field_var_hists(struct hist_trigger_data *hist_data)
+{
+       unsigned int i;
+
+       for (i = 0; i < hist_data->n_field_var_hists; i++) {
+               kfree(hist_data->field_var_hists[i]->cmd);
+               kfree(hist_data->field_var_hists[i]);
+       }
 }
 
 static void destroy_hist_data(struct hist_trigger_data *hist_data)
 {
+       if (!hist_data)
+               return;
+
        destroy_hist_trigger_attrs(hist_data->attrs);
        destroy_hist_fields(hist_data);
        tracing_map_destroy(hist_data->map);
+
+       destroy_actions(hist_data);
+       destroy_field_vars(hist_data);
+       destroy_field_var_hists(hist_data);
+       destroy_synth_var_refs(hist_data);
+
        kfree(hist_data);
 }
 
@@ -781,7 +4458,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
        struct tracing_map *map = hist_data->map;
        struct ftrace_event_field *field;
        struct hist_field *hist_field;
-       int i, idx;
+       int i, idx = 0;
 
        for_each_hist_field(i, hist_data) {
                hist_field = hist_data->fields[i];
@@ -792,6 +4469,9 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
 
                        if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
                                cmp_fn = tracing_map_cmp_none;
+                       else if (!field)
+                               cmp_fn = tracing_map_cmp_num(hist_field->size,
+                                                            hist_field->is_signed);
                        else if (is_string_field(field))
                                cmp_fn = tracing_map_cmp_string;
                        else
@@ -800,36 +4480,29 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
                        idx = tracing_map_add_key_field(map,
                                                        hist_field->offset,
                                                        cmp_fn);
-
-               } else
+               } else if (!(hist_field->flags & HIST_FIELD_FL_VAR))
                        idx = tracing_map_add_sum_field(map);
 
                if (idx < 0)
                        return idx;
-       }
-
-       return 0;
-}
-
-static bool need_tracing_map_ops(struct hist_trigger_data *hist_data)
-{
-       struct hist_field *key_field;
-       unsigned int i;
-
-       for_each_hist_key_field(i, hist_data) {
-               key_field = hist_data->fields[i];
 
-               if (key_field->flags & HIST_FIELD_FL_EXECNAME)
-                       return true;
+               if (hist_field->flags & HIST_FIELD_FL_VAR) {
+                       idx = tracing_map_add_var(map);
+                       if (idx < 0)
+                               return idx;
+                       hist_field->var.idx = idx;
+                       hist_field->var.hist_data = hist_data;
+               }
        }
 
-       return false;
+       return 0;
 }
 
 static struct hist_trigger_data *
 create_hist_data(unsigned int map_bits,
                 struct hist_trigger_attrs *attrs,
-                struct trace_event_file *file)
+                struct trace_event_file *file,
+                bool remove)
 {
        const struct tracing_map_ops *map_ops = NULL;
        struct hist_trigger_data *hist_data;
@@ -840,6 +4513,12 @@ create_hist_data(unsigned int map_bits,
                return ERR_PTR(-ENOMEM);
 
        hist_data->attrs = attrs;
+       hist_data->remove = remove;
+       hist_data->event_file = file;
+
+       ret = parse_actions(hist_data);
+       if (ret)
+               goto free;
 
        ret = create_hist_fields(hist_data, file);
        if (ret)
@@ -849,8 +4528,7 @@ create_hist_data(unsigned int map_bits,
        if (ret)
                goto free;
 
-       if (need_tracing_map_ops(hist_data))
-               map_ops = &hist_trigger_elt_comm_ops;
+       map_ops = &hist_trigger_elt_data_ops;
 
        hist_data->map = tracing_map_create(map_bits, hist_data->key_size,
                                            map_ops, hist_data);
@@ -863,12 +4541,6 @@ create_hist_data(unsigned int map_bits,
        ret = create_tracing_map_fields(hist_data);
        if (ret)
                goto free;
-
-       ret = tracing_map_init(hist_data->map);
-       if (ret)
-               goto free;
-
-       hist_data->event_file = file;
  out:
        return hist_data;
  free:
@@ -882,18 +4554,39 @@ create_hist_data(unsigned int map_bits,
 }
 
 static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
-                                   struct tracing_map_elt *elt,
-                                   void *rec)
+                                   struct tracing_map_elt *elt, void *rec,
+                                   struct ring_buffer_event *rbe,
+                                   u64 *var_ref_vals)
 {
+       struct hist_elt_data *elt_data;
        struct hist_field *hist_field;
-       unsigned int i;
+       unsigned int i, var_idx;
        u64 hist_val;
 
+       elt_data = elt->private_data;
+       elt_data->var_ref_vals = var_ref_vals;
+
        for_each_hist_val_field(i, hist_data) {
                hist_field = hist_data->fields[i];
-               hist_val = hist_field->fn(hist_field, rec);
+               hist_val = hist_field->fn(hist_field, elt, rbe, rec);
+               if (hist_field->flags & HIST_FIELD_FL_VAR) {
+                       var_idx = hist_field->var.idx;
+                       tracing_map_set_var(elt, var_idx, hist_val);
+                       continue;
+               }
                tracing_map_update_sum(elt, i, hist_val);
        }
+
+       for_each_hist_key_field(i, hist_data) {
+               hist_field = hist_data->fields[i];
+               if (hist_field->flags & HIST_FIELD_FL_VAR) {
+                       hist_val = hist_field->fn(hist_field, elt, rbe, rec);
+                       var_idx = hist_field->var.idx;
+                       tracing_map_set_var(elt, var_idx, hist_val);
+               }
+       }
+
+       update_field_vars(hist_data, elt, rbe, rec);
 }
 
 static inline void add_to_key(char *compound_key, void *key,
@@ -920,15 +4613,31 @@ static inline void add_to_key(char *compound_key, void *key,
        memcpy(compound_key + key_field->offset, key, size);
 }
 
-static void event_hist_trigger(struct event_trigger_data *data, void *rec)
+static void
+hist_trigger_actions(struct hist_trigger_data *hist_data,
+                    struct tracing_map_elt *elt, void *rec,
+                    struct ring_buffer_event *rbe, u64 *var_ref_vals)
+{
+       struct action_data *data;
+       unsigned int i;
+
+       for (i = 0; i < hist_data->n_actions; i++) {
+               data = hist_data->actions[i];
+               data->fn(hist_data, elt, rec, rbe, data, var_ref_vals);
+       }
+}
+
+static void event_hist_trigger(struct event_trigger_data *data, void *rec,
+                              struct ring_buffer_event *rbe)
 {
        struct hist_trigger_data *hist_data = data->private_data;
        bool use_compound_key = (hist_data->n_keys > 1);
        unsigned long entries[HIST_STACKTRACE_DEPTH];
+       u64 var_ref_vals[TRACING_MAP_VARS_MAX];
        char compound_key[HIST_KEY_SIZE_MAX];
+       struct tracing_map_elt *elt = NULL;
        struct stack_trace stacktrace;
        struct hist_field *key_field;
-       struct tracing_map_elt *elt;
        u64 field_contents;
        void *key = NULL;
        unsigned int i;
@@ -949,7 +4658,7 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec)
 
                        key = entries;
                } else {
-                       field_contents = key_field->fn(key_field, rec);
+                       field_contents = key_field->fn(key_field, elt, rbe, rec);
                        if (key_field->flags & HIST_FIELD_FL_STRING) {
                                key = (void *)(unsigned long)field_contents;
                                use_compound_key = true;
@@ -964,9 +4673,18 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec)
        if (use_compound_key)
                key = compound_key;
 
+       if (hist_data->n_var_refs &&
+           !resolve_var_refs(hist_data, key, var_ref_vals, false))
+               return;
+
        elt = tracing_map_insert(hist_data->map, key);
-       if (elt)
-               hist_trigger_elt_update(hist_data, elt, rec);
+       if (!elt)
+               return;
+
+       hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals);
+
+       if (resolve_var_refs(hist_data, key, var_ref_vals, true))
+               hist_trigger_actions(hist_data, elt, rec, rbe, var_ref_vals);
 }
 
 static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -1023,7 +4741,13 @@ hist_trigger_entry_print(struct seq_file *m,
                        seq_printf(m, "%s: [%llx] %-55s", field_name,
                                   uval, str);
                } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
-                       char *comm = elt->private_data;
+                       struct hist_elt_data *elt_data = elt->private_data;
+                       char *comm;
+
+                       if (WARN_ON_ONCE(!elt_data))
+                               return;
+
+                       comm = elt_data->comm;
 
                        uval = *(u64 *)(key + key_field->offset);
                        seq_printf(m, "%s: %-16s[%10llu]", field_name,
@@ -1067,6 +4791,10 @@ hist_trigger_entry_print(struct seq_file *m,
        for (i = 1; i < hist_data->n_vals; i++) {
                field_name = hist_field_name(hist_data->fields[i], 0);
 
+               if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR ||
+                   hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR)
+                       continue;
+
                if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
                        seq_printf(m, "  %s: %10llx", field_name,
                                   tracing_map_read_sum(elt, i));
@@ -1076,6 +4804,8 @@ hist_trigger_entry_print(struct seq_file *m,
                }
        }
 
+       print_actions(m, hist_data, elt);
+
        seq_puts(m, "\n");
 }
 
@@ -1144,6 +4874,11 @@ static int hist_show(struct seq_file *m, void *v)
                        hist_trigger_show(m, data, n++);
        }
 
+       if (have_hist_err()) {
+               seq_printf(m, "\nERROR: %s\n", hist_err_str);
+               seq_printf(m, "  Last command: %s\n", last_hist_cmd);
+       }
+
  out_unlock:
        mutex_unlock(&event_mutex);
 
@@ -1162,37 +4897,22 @@ const struct file_operations event_hist_fops = {
        .release = single_release,
 };
 
-static const char *get_hist_field_flags(struct hist_field *hist_field)
-{
-       const char *flags_str = NULL;
-
-       if (hist_field->flags & HIST_FIELD_FL_HEX)
-               flags_str = "hex";
-       else if (hist_field->flags & HIST_FIELD_FL_SYM)
-               flags_str = "sym";
-       else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET)
-               flags_str = "sym-offset";
-       else if (hist_field->flags & HIST_FIELD_FL_EXECNAME)
-               flags_str = "execname";
-       else if (hist_field->flags & HIST_FIELD_FL_SYSCALL)
-               flags_str = "syscall";
-       else if (hist_field->flags & HIST_FIELD_FL_LOG2)
-               flags_str = "log2";
-
-       return flags_str;
-}
-
 static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
 {
        const char *field_name = hist_field_name(hist_field, 0);
 
-       seq_printf(m, "%s", field_name);
-       if (hist_field->flags) {
-               const char *flags_str = get_hist_field_flags(hist_field);
-
-               if (flags_str)
-                       seq_printf(m, ".%s", flags_str);
-       }
+       if (hist_field->var.name)
+               seq_printf(m, "%s=", hist_field->var.name);
+
+       if (hist_field->flags & HIST_FIELD_FL_CPU)
+               seq_puts(m, "cpu");
+       else if (field_name) {
+               if (hist_field->flags & HIST_FIELD_FL_VAR_REF ||
+                   hist_field->flags & HIST_FIELD_FL_ALIAS)
+                       seq_putc(m, '$');
+               seq_printf(m, "%s", field_name);
+       } else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
+               seq_puts(m, "common_timestamp");
 }
 
 static int event_hist_trigger_print(struct seq_file *m,
@@ -1200,7 +4920,8 @@ static int event_hist_trigger_print(struct seq_file *m,
                                    struct event_trigger_data *data)
 {
        struct hist_trigger_data *hist_data = data->private_data;
-       struct hist_field *key_field;
+       struct hist_field *field;
+       bool have_var = false;
        unsigned int i;
 
        seq_puts(m, "hist:");
@@ -1211,25 +4932,47 @@ static int event_hist_trigger_print(struct seq_file *m,
        seq_puts(m, "keys=");
 
        for_each_hist_key_field(i, hist_data) {
-               key_field = hist_data->fields[i];
+               field = hist_data->fields[i];
 
                if (i > hist_data->n_vals)
                        seq_puts(m, ",");
 
-               if (key_field->flags & HIST_FIELD_FL_STACKTRACE)
+               if (field->flags & HIST_FIELD_FL_STACKTRACE)
                        seq_puts(m, "stacktrace");
                else
-                       hist_field_print(m, key_field);
+                       hist_field_print(m, field);
        }
 
        seq_puts(m, ":vals=");
 
        for_each_hist_val_field(i, hist_data) {
+               field = hist_data->fields[i];
+               if (field->flags & HIST_FIELD_FL_VAR) {
+                       have_var = true;
+                       continue;
+               }
+
                if (i == HITCOUNT_IDX)
                        seq_puts(m, "hitcount");
                else {
                        seq_puts(m, ",");
-                       hist_field_print(m, hist_data->fields[i]);
+                       hist_field_print(m, field);
+               }
+       }
+
+       if (have_var) {
+               unsigned int n = 0;
+
+               seq_puts(m, ":");
+
+               for_each_hist_val_field(i, hist_data) {
+                       field = hist_data->fields[i];
+
+                       if (field->flags & HIST_FIELD_FL_VAR) {
+                               if (n++)
+                                       seq_puts(m, ",");
+                               hist_field_print(m, field);
+                       }
                }
        }
 
@@ -1237,28 +4980,36 @@ static int event_hist_trigger_print(struct seq_file *m,
 
        for (i = 0; i < hist_data->n_sort_keys; i++) {
                struct tracing_map_sort_key *sort_key;
+               unsigned int idx, first_key_idx;
+
+               /* skip VAR vals */
+               first_key_idx = hist_data->n_vals - hist_data->n_vars;
 
                sort_key = &hist_data->sort_keys[i];
+               idx = sort_key->field_idx;
+
+               if (WARN_ON(idx >= HIST_FIELDS_MAX))
+                       return -EINVAL;
 
                if (i > 0)
                        seq_puts(m, ",");
 
-               if (sort_key->field_idx == HITCOUNT_IDX)
+               if (idx == HITCOUNT_IDX)
                        seq_puts(m, "hitcount");
                else {
-                       unsigned int idx = sort_key->field_idx;
-
-                       if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX))
-                               return -EINVAL;
-
+                       if (idx >= first_key_idx)
+                               idx += hist_data->n_vars;
                        hist_field_print(m, hist_data->fields[idx]);
                }
 
                if (sort_key->descending)
                        seq_puts(m, ".descending");
        }
-
        seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits));
+       if (hist_data->enable_timestamps)
+               seq_printf(m, ":clock=%s", hist_data->attrs->clock);
+
+       print_actions_spec(m, hist_data);
 
        if (data->filter_str)
                seq_printf(m, " if %s", data->filter_str);
@@ -1286,6 +5037,21 @@ static int event_hist_trigger_init(struct event_trigger_ops *ops,
        return 0;
 }
 
+static void unregister_field_var_hists(struct hist_trigger_data *hist_data)
+{
+       struct trace_event_file *file;
+       unsigned int i;
+       char *cmd;
+       int ret;
+
+       for (i = 0; i < hist_data->n_field_var_hists; i++) {
+               file = hist_data->field_var_hists[i]->hist_data->event_file;
+               cmd = hist_data->field_var_hists[i]->cmd;
+               ret = event_hist_trigger_func(&trigger_hist_cmd, file,
+                                             "!hist", "hist", cmd);
+       }
+}
+
 static void event_hist_trigger_free(struct event_trigger_ops *ops,
                                    struct event_trigger_data *data)
 {
@@ -1298,7 +5064,13 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops,
        if (!data->ref) {
                if (data->name)
                        del_named_trigger(data);
+
                trigger_data_free(data);
+
+               remove_hist_vars(hist_data);
+
+               unregister_field_var_hists(hist_data);
+
                destroy_hist_data(hist_data);
        }
 }
@@ -1425,6 +5197,15 @@ static bool hist_trigger_match(struct event_trigger_data *data,
                        return false;
                if (key_field->offset != key_field_test->offset)
                        return false;
+               if (key_field->size != key_field_test->size)
+                       return false;
+               if (key_field->is_signed != key_field_test->is_signed)
+                       return false;
+               if (!!key_field->var.name != !!key_field_test->var.name)
+                       return false;
+               if (key_field->var.name &&
+                   strcmp(key_field->var.name, key_field_test->var.name) != 0)
+                       return false;
        }
 
        for (i = 0; i < hist_data->n_sort_keys; i++) {
@@ -1440,6 +5221,9 @@ static bool hist_trigger_match(struct event_trigger_data *data,
            (strcmp(data->filter_str, data_test->filter_str) != 0))
                return false;
 
+       if (!actions_match(hist_data, hist_data_test))
+               return false;
+
        return true;
 }
 
@@ -1456,6 +5240,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
                if (named_data) {
                        if (!hist_trigger_match(data, named_data, named_data,
                                                true)) {
+                               hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name);
                                ret = -EINVAL;
                                goto out;
                        }
@@ -1475,13 +5260,16 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
                                test->paused = false;
                        else if (hist_data->attrs->clear)
                                hist_clear(test);
-                       else
+                       else {
+                               hist_err("Hist trigger already exists", NULL);
                                ret = -EEXIST;
+                       }
                        goto out;
                }
        }
  new:
        if (hist_data->attrs->cont || hist_data->attrs->clear) {
+               hist_err("Can't clear or continue a nonexistent hist trigger", NULL);
                ret = -ENOENT;
                goto out;
        }
@@ -1490,7 +5278,6 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
                data->paused = true;
 
        if (named_data) {
-               destroy_hist_data(data->private_data);
                data->private_data = named_data->private_data;
                set_named_trigger_data(data, named_data);
                data->ops = &event_hist_trigger_named_ops;
@@ -1502,8 +5289,32 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
                        goto out;
        }
 
-       list_add_rcu(&data->list, &file->triggers);
+       if (hist_data->enable_timestamps) {
+               char *clock = hist_data->attrs->clock;
+
+               ret = tracing_set_clock(file->tr, hist_data->attrs->clock);
+               if (ret) {
+                       hist_err("Couldn't set trace_clock: ", clock);
+                       goto out;
+               }
+
+               tracing_set_time_stamp_abs(file->tr, true);
+       }
+
+       if (named_data)
+               destroy_hist_data(hist_data);
+
        ret++;
+ out:
+       return ret;
+}
+
+static int hist_trigger_enable(struct event_trigger_data *data,
+                              struct trace_event_file *file)
+{
+       int ret = 0;
+
+       list_add_tail_rcu(&data->list, &file->triggers);
 
        update_cond_flag(file);
 
@@ -1512,10 +5323,55 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
                update_cond_flag(file);
                ret--;
        }
- out:
+
        return ret;
 }
 
+static bool have_hist_trigger_match(struct event_trigger_data *data,
+                                   struct trace_event_file *file)
+{
+       struct hist_trigger_data *hist_data = data->private_data;
+       struct event_trigger_data *test, *named_data = NULL;
+       bool match = false;
+
+       if (hist_data->attrs->name)
+               named_data = find_named_trigger(hist_data->attrs->name);
+
+       list_for_each_entry_rcu(test, &file->triggers, list) {
+               if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+                       if (hist_trigger_match(data, test, named_data, false)) {
+                               match = true;
+                               break;
+                       }
+               }
+       }
+
+       return match;
+}
+
+static bool hist_trigger_check_refs(struct event_trigger_data *data,
+                                   struct trace_event_file *file)
+{
+       struct hist_trigger_data *hist_data = data->private_data;
+       struct event_trigger_data *test, *named_data = NULL;
+
+       if (hist_data->attrs->name)
+               named_data = find_named_trigger(hist_data->attrs->name);
+
+       list_for_each_entry_rcu(test, &file->triggers, list) {
+               if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+                       if (!hist_trigger_match(data, test, named_data, false))
+                               continue;
+                       hist_data = test->private_data;
+                       if (check_var_refs(hist_data))
+                               return true;
+                       break;
+               }
+       }
+
+       return false;
+}
+
 static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
                                    struct event_trigger_data *data,
                                    struct trace_event_file *file)
@@ -1541,17 +5397,55 @@ static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
 
        if (unregistered && test->ops->free)
                test->ops->free(test->ops, test);
+
+       if (hist_data->enable_timestamps) {
+               if (!hist_data->remove || unregistered)
+                       tracing_set_time_stamp_abs(file->tr, false);
+       }
+}
+
+static bool hist_file_check_refs(struct trace_event_file *file)
+{
+       struct hist_trigger_data *hist_data;
+       struct event_trigger_data *test;
+
+       list_for_each_entry_rcu(test, &file->triggers, list) {
+               if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+                       hist_data = test->private_data;
+                       if (check_var_refs(hist_data))
+                               return true;
+               }
+       }
+
+       return false;
 }
 
 static void hist_unreg_all(struct trace_event_file *file)
 {
        struct event_trigger_data *test, *n;
+       struct hist_trigger_data *hist_data;
+       struct synth_event *se;
+       const char *se_name;
+
+       if (hist_file_check_refs(file))
+               return;
 
        list_for_each_entry_safe(test, n, &file->triggers, list) {
                if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+                       hist_data = test->private_data;
                        list_del_rcu(&test->list);
                        trace_event_trigger_enable_disable(file, 0);
+
+                       mutex_lock(&synth_event_mutex);
+                       se_name = trace_event_name(file->event_call);
+                       se = find_synth_event(se_name);
+                       if (se)
+                               se->ref--;
+                       mutex_unlock(&synth_event_mutex);
+
                        update_cond_flag(file);
+                       if (hist_data->enable_timestamps)
+                               tracing_set_time_stamp_abs(file->tr, false);
                        if (test->ops->free)
                                test->ops->free(test->ops, test);
                }
@@ -1567,16 +5461,54 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
        struct hist_trigger_attrs *attrs;
        struct event_trigger_ops *trigger_ops;
        struct hist_trigger_data *hist_data;
-       char *trigger;
+       struct synth_event *se;
+       const char *se_name;
+       bool remove = false;
+       char *trigger, *p;
        int ret = 0;
 
+       if (glob && strlen(glob)) {
+               last_cmd_set(param);
+               hist_err_clear();
+       }
+
        if (!param)
                return -EINVAL;
 
-       /* separate the trigger from the filter (k:v [if filter]) */
-       trigger = strsep(&param, " \t");
-       if (!trigger)
-               return -EINVAL;
+       if (glob[0] == '!')
+               remove = true;
+
+       /*
+        * separate the trigger from the filter (k:v [if filter])
+        * allowing for whitespace in the trigger
+        */
+       p = trigger = param;
+       do {
+               p = strstr(p, "if");
+               if (!p)
+                       break;
+               if (p == param)
+                       return -EINVAL;
+               if (*(p - 1) != ' ' && *(p - 1) != '\t') {
+                       p++;
+                       continue;
+               }
+               if (p >= param + strlen(param) - strlen("if") - 1)
+                       return -EINVAL;
+               if (*(p + strlen("if")) != ' ' && *(p + strlen("if")) != '\t') {
+                       p++;
+                       continue;
+               }
+               break;
+       } while (p);
+
+       if (!p)
+               param = NULL;
+       else {
+               *(p - 1) = '\0';
+               param = strstrip(p);
+               trigger = strstrip(trigger);
+       }
 
        attrs = parse_hist_trigger_attrs(trigger);
        if (IS_ERR(attrs))
@@ -1585,7 +5517,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
        if (attrs->map_bits)
                hist_trigger_bits = attrs->map_bits;
 
-       hist_data = create_hist_data(hist_trigger_bits, attrs, file);
+       hist_data = create_hist_data(hist_trigger_bits, attrs, file, remove);
        if (IS_ERR(hist_data)) {
                destroy_hist_trigger_attrs(attrs);
                return PTR_ERR(hist_data);
@@ -1593,10 +5525,11 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 
        trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
 
-       ret = -ENOMEM;
        trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
-       if (!trigger_data)
+       if (!trigger_data) {
+               ret = -ENOMEM;
                goto out_free;
+       }
 
        trigger_data->count = -1;
        trigger_data->ops = trigger_ops;
@@ -1614,8 +5547,24 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
                        goto out_free;
        }
 
-       if (glob[0] == '!') {
+       if (remove) {
+               if (!have_hist_trigger_match(trigger_data, file))
+                       goto out_free;
+
+               if (hist_trigger_check_refs(trigger_data, file)) {
+                       ret = -EBUSY;
+                       goto out_free;
+               }
+
                cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+
+               mutex_lock(&synth_event_mutex);
+               se_name = trace_event_name(file->event_call);
+               se = find_synth_event(se_name);
+               if (se)
+                       se->ref--;
+               mutex_unlock(&synth_event_mutex);
+
                ret = 0;
                goto out_free;
        }
@@ -1632,14 +5581,47 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
                goto out_free;
        } else if (ret < 0)
                goto out_free;
+
+       if (get_named_trigger_data(trigger_data))
+               goto enable;
+
+       if (has_hist_vars(hist_data))
+               save_hist_vars(hist_data);
+
+       ret = create_actions(hist_data, file);
+       if (ret)
+               goto out_unreg;
+
+       ret = tracing_map_init(hist_data->map);
+       if (ret)
+               goto out_unreg;
+enable:
+       ret = hist_trigger_enable(trigger_data, file);
+       if (ret)
+               goto out_unreg;
+
+       mutex_lock(&synth_event_mutex);
+       se_name = trace_event_name(file->event_call);
+       se = find_synth_event(se_name);
+       if (se)
+               se->ref++;
+       mutex_unlock(&synth_event_mutex);
+
        /* Just return zero, not the number of registered triggers */
        ret = 0;
  out:
+       if (ret == 0)
+               hist_err_clear();
+
        return ret;
+ out_unreg:
+       cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
  out_free:
        if (cmd_ops->set_filter)
                cmd_ops->set_filter(NULL, trigger_data, NULL);
 
+       remove_hist_vars(hist_data);
+
        kfree(trigger_data);
 
        destroy_hist_data(hist_data);
@@ -1669,7 +5651,8 @@ __init int register_trigger_hist_cmd(void)
 }
 
 static void
-hist_enable_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_trigger(struct event_trigger_data *data, void *rec,
+                   struct ring_buffer_event *event)
 {
        struct enable_trigger_data *enable_data = data->private_data;
        struct event_trigger_data *test;
@@ -1685,7 +5668,8 @@ hist_enable_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-hist_enable_count_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_count_trigger(struct event_trigger_data *data, void *rec,
+                         struct ring_buffer_event *event)
 {
        if (!data->count)
                return;
@@ -1693,7 +5677,7 @@ hist_enable_count_trigger(struct event_trigger_data *data, void *rec)
        if (data->count != -1)
                (data->count)--;
 
-       hist_enable_trigger(data, rec);
+       hist_enable_trigger(data, rec, event);
 }
 
 static struct event_trigger_ops hist_enable_trigger_ops = {
@@ -1798,3 +5782,31 @@ __init int register_trigger_hist_enable_disable_cmds(void)
 
        return ret;
 }
+
+static __init int trace_events_hist_init(void)
+{
+       struct dentry *entry = NULL;
+       struct dentry *d_tracer;
+       int err = 0;
+
+       d_tracer = tracing_init_dentry();
+       if (IS_ERR(d_tracer)) {
+               err = PTR_ERR(d_tracer);
+               goto err;
+       }
+
+       entry = tracefs_create_file("synthetic_events", 0644, d_tracer,
+                                   NULL, &synth_events_fops);
+       if (!entry) {
+               err = -ENODEV;
+               goto err;
+       }
+
+       return err;
+ err:
+       pr_warn("Could not create tracefs 'synthetic_events' entry\n");
+
+       return err;
+}
+
+fs_initcall(trace_events_hist_init);
index 87411482a46f2753685c9eab3d9e91830d7ae47d..d251cabcf69a093d149f285f7e1d9ea0f5f9a876 100644 (file)
@@ -63,7 +63,8 @@ void trigger_data_free(struct event_trigger_data *data)
  * any trigger that should be deferred, ETT_NONE if nothing to defer.
  */
 enum event_trigger_type
-event_triggers_call(struct trace_event_file *file, void *rec)
+event_triggers_call(struct trace_event_file *file, void *rec,
+                   struct ring_buffer_event *event)
 {
        struct event_trigger_data *data;
        enum event_trigger_type tt = ETT_NONE;
@@ -76,7 +77,7 @@ event_triggers_call(struct trace_event_file *file, void *rec)
                if (data->paused)
                        continue;
                if (!rec) {
-                       data->ops->func(data, rec);
+                       data->ops->func(data, rec, event);
                        continue;
                }
                filter = rcu_dereference_sched(data->filter);
@@ -86,7 +87,7 @@ event_triggers_call(struct trace_event_file *file, void *rec)
                        tt |= data->cmd_ops->trigger_type;
                        continue;
                }
-               data->ops->func(data, rec);
+               data->ops->func(data, rec, event);
        }
        return tt;
 }
@@ -108,7 +109,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
 void
 event_triggers_post_call(struct trace_event_file *file,
                         enum event_trigger_type tt,
-                        void *rec)
+                        void *rec, struct ring_buffer_event *event)
 {
        struct event_trigger_data *data;
 
@@ -116,7 +117,7 @@ event_triggers_post_call(struct trace_event_file *file,
                if (data->paused)
                        continue;
                if (data->cmd_ops->trigger_type & tt)
-                       data->ops->func(data, rec);
+                       data->ops->func(data, rec, event);
        }
 }
 EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -908,8 +909,15 @@ void set_named_trigger_data(struct event_trigger_data *data,
        data->named_data = named_data;
 }
 
+struct event_trigger_data *
+get_named_trigger_data(struct event_trigger_data *data)
+{
+       return data->named_data;
+}
+
 static void
-traceon_trigger(struct event_trigger_data *data, void *rec)
+traceon_trigger(struct event_trigger_data *data, void *rec,
+               struct ring_buffer_event *event)
 {
        if (tracing_is_on())
                return;
@@ -918,7 +926,8 @@ traceon_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-traceon_count_trigger(struct event_trigger_data *data, void *rec)
+traceon_count_trigger(struct event_trigger_data *data, void *rec,
+                     struct ring_buffer_event *event)
 {
        if (tracing_is_on())
                return;
@@ -933,7 +942,8 @@ traceon_count_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-traceoff_trigger(struct event_trigger_data *data, void *rec)
+traceoff_trigger(struct event_trigger_data *data, void *rec,
+                struct ring_buffer_event *event)
 {
        if (!tracing_is_on())
                return;
@@ -942,7 +952,8 @@ traceoff_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-traceoff_count_trigger(struct event_trigger_data *data, void *rec)
+traceoff_count_trigger(struct event_trigger_data *data, void *rec,
+                      struct ring_buffer_event *event)
 {
        if (!tracing_is_on())
                return;
@@ -1039,13 +1050,15 @@ static struct event_command trigger_traceoff_cmd = {
 
 #ifdef CONFIG_TRACER_SNAPSHOT
 static void
-snapshot_trigger(struct event_trigger_data *data, void *rec)
+snapshot_trigger(struct event_trigger_data *data, void *rec,
+                struct ring_buffer_event *event)
 {
        tracing_snapshot();
 }
 
 static void
-snapshot_count_trigger(struct event_trigger_data *data, void *rec)
+snapshot_count_trigger(struct event_trigger_data *data, void *rec,
+                      struct ring_buffer_event *event)
 {
        if (!data->count)
                return;
@@ -1053,7 +1066,7 @@ snapshot_count_trigger(struct event_trigger_data *data, void *rec)
        if (data->count != -1)
                (data->count)--;
 
-       snapshot_trigger(data, rec);
+       snapshot_trigger(data, rec, event);
 }
 
 static int
@@ -1141,13 +1154,15 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; }
 #endif
 
 static void
-stacktrace_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_trigger(struct event_trigger_data *data, void *rec,
+                  struct ring_buffer_event *event)
 {
        trace_dump_stack(STACK_SKIP);
 }
 
 static void
-stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_count_trigger(struct event_trigger_data *data, void *rec,
+                        struct ring_buffer_event *event)
 {
        if (!data->count)
                return;
@@ -1155,7 +1170,7 @@ stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
        if (data->count != -1)
                (data->count)--;
 
-       stacktrace_trigger(data, rec);
+       stacktrace_trigger(data, rec, event);
 }
 
 static int
@@ -1217,7 +1232,8 @@ static __init void unregister_trigger_traceon_traceoff_cmds(void)
 }
 
 static void
-event_enable_trigger(struct event_trigger_data *data, void *rec)
+event_enable_trigger(struct event_trigger_data *data, void *rec,
+                    struct ring_buffer_event *event)
 {
        struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1228,7 +1244,8 @@ event_enable_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-event_enable_count_trigger(struct event_trigger_data *data, void *rec)
+event_enable_count_trigger(struct event_trigger_data *data, void *rec,
+                          struct ring_buffer_event *event)
 {
        struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1242,7 +1259,7 @@ event_enable_count_trigger(struct event_trigger_data *data, void *rec)
        if (data->count != -1)
                (data->count)--;
 
-       event_enable_trigger(data, rec);
+       event_enable_trigger(data, rec, event);
 }
 
 int event_enable_trigger_print(struct seq_file *m,
index 07e75344725ba254f5c42a314659142f0fd48528..5cadb1b8b5fe6fd8f71d213581522bdb0d03ecf8 100644 (file)
@@ -66,6 +66,73 @@ u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i)
        return (u64)atomic64_read(&elt->fields[i].sum);
 }
 
+/**
+ * tracing_map_set_var - Assign a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ * @n: The value to assign
+ *
+ * Assign n to variable i associated with the specified tracing_map_elt
+ * instance.  The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up.
+ */
+void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n)
+{
+       atomic64_set(&elt->vars[i], n);
+       elt->var_set[i] = true;
+}
+
+/**
+ * tracing_map_var_set - Return whether or not a variable has been set
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Return true if the variable has been set, false otherwise.  The
+ * index i is the index returned by the call to tracing_map_add_var()
+ * when the tracing map was set up.
+ */
+bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i)
+{
+       return elt->var_set[i];
+}
+
+/**
+ * tracing_map_read_var - Return the value of a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance.  The index i is the index returned by the
+ * call to tracing_map_add_var() when the tracing map was set
+ * up.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i)
+{
+       return (u64)atomic64_read(&elt->vars[i]);
+}
+
+/**
+ * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance, and reset the variable to the 'not set'
+ * state.  The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up.  The reset
+ * essentially makes the variable a read-once variable if it's only
+ * accessed using this function.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i)
+{
+       elt->var_set[i] = false;
+       return (u64)atomic64_read(&elt->vars[i]);
+}
+
 int tracing_map_cmp_string(void *val_a, void *val_b)
 {
        char *a = val_a;
@@ -170,6 +237,28 @@ int tracing_map_add_sum_field(struct tracing_map *map)
        return tracing_map_add_field(map, tracing_map_cmp_atomic64);
 }
 
+/**
+ * tracing_map_add_var - Add a field describing a tracing_map var
+ * @map: The tracing_map
+ *
+ * Add a var to the map and return the index identifying it in the map
+ * and associated tracing_map_elts.  This is the index used for
+ * instance to update a var for a particular tracing_map_elt using
+ * tracing_map_update_var() or reading it via tracing_map_read_var().
+ *
+ * Return: The index identifying the var in the map and associated
+ * tracing_map_elts, or -EINVAL on error.
+ */
+int tracing_map_add_var(struct tracing_map *map)
+{
+       int ret = -EINVAL;
+
+       if (map->n_vars < TRACING_MAP_VARS_MAX)
+               ret = map->n_vars++;
+
+       return ret;
+}
+
 /**
  * tracing_map_add_key_field - Add a field describing a tracing_map key
  * @map: The tracing_map
@@ -280,6 +369,11 @@ static void tracing_map_elt_clear(struct tracing_map_elt *elt)
                if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64)
                        atomic64_set(&elt->fields[i].sum, 0);
 
+       for (i = 0; i < elt->map->n_vars; i++) {
+               atomic64_set(&elt->vars[i], 0);
+               elt->var_set[i] = false;
+       }
+
        if (elt->map->ops && elt->map->ops->elt_clear)
                elt->map->ops->elt_clear(elt);
 }
@@ -306,6 +400,8 @@ static void tracing_map_elt_free(struct tracing_map_elt *elt)
        if (elt->map->ops && elt->map->ops->elt_free)
                elt->map->ops->elt_free(elt);
        kfree(elt->fields);
+       kfree(elt->vars);
+       kfree(elt->var_set);
        kfree(elt->key);
        kfree(elt);
 }
@@ -333,6 +429,18 @@ static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map)
                goto free;
        }
 
+       elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL);
+       if (!elt->vars) {
+               err = -ENOMEM;
+               goto free;
+       }
+
+       elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL);
+       if (!elt->var_set) {
+               err = -ENOMEM;
+               goto free;
+       }
+
        tracing_map_elt_init_fields(elt);
 
        if (map->ops && map->ops->elt_alloc) {
@@ -414,7 +522,9 @@ static inline struct tracing_map_elt *
 __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
 {
        u32 idx, key_hash, test_key;
+       int dup_try = 0;
        struct tracing_map_entry *entry;
+       struct tracing_map_elt *val;
 
        key_hash = jhash(key, map->key_size, 0);
        if (key_hash == 0)
@@ -426,11 +536,33 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
                entry = TRACING_MAP_ENTRY(map->map, idx);
                test_key = entry->key;
 
-               if (test_key && test_key == key_hash && entry->val &&
-                   keys_match(key, entry->val->key, map->key_size)) {
-                       if (!lookup_only)
-                               atomic64_inc(&map->hits);
-                       return entry->val;
+               if (test_key && test_key == key_hash) {
+                       val = READ_ONCE(entry->val);
+                       if (val &&
+                           keys_match(key, val->key, map->key_size)) {
+                               if (!lookup_only)
+                                       atomic64_inc(&map->hits);
+                               return val;
+                       } else if (unlikely(!val)) {
+                               /*
+                                * The key is present. But, val (pointer to elt
+                                * struct) is still NULL. which means some other
+                                * thread is in the process of inserting an
+                                * element.
+                                *
+                                * On top of that, it's key_hash is same as the
+                                * one being inserted right now. So, it's
+                                * possible that the element has the same
+                                * key as well.
+                                */
+
+                               dup_try++;
+                               if (dup_try > map->map_size) {
+                                       atomic64_inc(&map->drops);
+                                       break;
+                               }
+                               continue;
+                       }
                }
 
                if (!test_key) {
@@ -452,6 +584,13 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
                                atomic64_inc(&map->hits);
 
                                return entry->val;
+                       } else {
+                               /*
+                                * cmpxchg() failed. Loop around once
+                                * more to check what key was inserted.
+                                */
+                               dup_try++;
+                               continue;
                        }
                }
 
@@ -816,67 +955,15 @@ create_sort_entry(void *key, struct tracing_map_elt *elt)
        return sort_entry;
 }
 
-static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt)
-{
-       struct tracing_map_elt *dup_elt;
-       unsigned int i;
-
-       dup_elt = tracing_map_elt_alloc(elt->map);
-       if (IS_ERR(dup_elt))
-               return NULL;
-
-       if (elt->map->ops && elt->map->ops->elt_copy)
-               elt->map->ops->elt_copy(dup_elt, elt);
-
-       dup_elt->private_data = elt->private_data;
-       memcpy(dup_elt->key, elt->key, elt->map->key_size);
-
-       for (i = 0; i < elt->map->n_fields; i++) {
-               atomic64_set(&dup_elt->fields[i].sum,
-                            atomic64_read(&elt->fields[i].sum));
-               dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn;
-       }
-
-       return dup_elt;
-}
-
-static int merge_dup(struct tracing_map_sort_entry **sort_entries,
-                    unsigned int target, unsigned int dup)
-{
-       struct tracing_map_elt *target_elt, *elt;
-       bool first_dup = (target - dup) == 1;
-       int i;
-
-       if (first_dup) {
-               elt = sort_entries[target]->elt;
-               target_elt = copy_elt(elt);
-               if (!target_elt)
-                       return -ENOMEM;
-               sort_entries[target]->elt = target_elt;
-               sort_entries[target]->elt_copied = true;
-       } else
-               target_elt = sort_entries[target]->elt;
-
-       elt = sort_entries[dup]->elt;
-
-       for (i = 0; i < elt->map->n_fields; i++)
-               atomic64_add(atomic64_read(&elt->fields[i].sum),
-                            &target_elt->fields[i].sum);
-
-       sort_entries[dup]->dup = true;
-
-       return 0;
-}
-
-static int merge_dups(struct tracing_map_sort_entry **sort_entries,
+static void detect_dups(struct tracing_map_sort_entry **sort_entries,
                      int n_entries, unsigned int key_size)
 {
        unsigned int dups = 0, total_dups = 0;
-       int err, i, j;
+       int i;
        void *key;
 
        if (n_entries < 2)
-               return total_dups;
+               return;
 
        sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *),
             (int (*)(const void *, const void *))cmp_entries_dup, NULL);
@@ -885,30 +972,14 @@ static int merge_dups(struct tracing_map_sort_entry **sort_entries,
        for (i = 1; i < n_entries; i++) {
                if (!memcmp(sort_entries[i]->key, key, key_size)) {
                        dups++; total_dups++;
-                       err = merge_dup(sort_entries, i - dups, i);
-                       if (err)
-                               return err;
                        continue;
                }
                key = sort_entries[i]->key;
                dups = 0;
        }
 
-       if (!total_dups)
-               return total_dups;
-
-       for (i = 0, j = 0; i < n_entries; i++) {
-               if (!sort_entries[i]->dup) {
-                       sort_entries[j] = sort_entries[i];
-                       if (j++ != i)
-                               sort_entries[i] = NULL;
-               } else {
-                       destroy_sort_entry(sort_entries[i]);
-                       sort_entries[i] = NULL;
-               }
-       }
-
-       return total_dups;
+       WARN_ONCE(total_dups > 0,
+                 "Duplicates detected: %d\n", total_dups);
 }
 
 static bool is_key(struct tracing_map *map, unsigned int field_idx)
@@ -1034,10 +1105,7 @@ int tracing_map_sort_entries(struct tracing_map *map,
                return 1;
        }
 
-       ret = merge_dups(entries, n_entries, map->key_size);
-       if (ret < 0)
-               goto free;
-       n_entries -= ret;
+       detect_dups(entries, n_entries, map->key_size);
 
        if (is_key(map, sort_keys[0].field_idx))
                cmp_entries_fn = cmp_entries_key;
index 5b5bbf8ae550dfe2c7d6b08496d1a8327b0d6a10..053eb92b2d31309924a1f3fd2c068935aedaf0c4 100644 (file)
@@ -10,6 +10,7 @@
 #define TRACING_MAP_VALS_MAX           3
 #define TRACING_MAP_FIELDS_MAX         (TRACING_MAP_KEYS_MAX + \
                                         TRACING_MAP_VALS_MAX)
+#define TRACING_MAP_VARS_MAX           16
 #define TRACING_MAP_SORT_KEYS_MAX      2
 
 typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b);
@@ -137,6 +138,8 @@ struct tracing_map_field {
 struct tracing_map_elt {
        struct tracing_map              *map;
        struct tracing_map_field        *fields;
+       atomic64_t                      *vars;
+       bool                            *var_set;
        void                            *key;
        void                            *private_data;
 };
@@ -192,6 +195,7 @@ struct tracing_map {
        int                             key_idx[TRACING_MAP_KEYS_MAX];
        unsigned int                    n_keys;
        struct tracing_map_sort_key     sort_key;
+       unsigned int                    n_vars;
        atomic64_t                      hits;
        atomic64_t                      drops;
 };
@@ -215,11 +219,6 @@ struct tracing_map {
  *     Element allocation occurs before tracing begins, when the
  *     tracing_map_init() call is made by client code.
  *
- * @elt_copy: At certain points in the lifetime of an element, it may
- *     need to be copied.  The copy should include a copy of the
- *     client-allocated data, which can be copied into the 'to'
- *     element from the 'from' element.
- *
  * @elt_free: When a tracing_map_elt is freed, this function is called
  *     and allows client-allocated per-element data to be freed.
  *
@@ -233,8 +232,6 @@ struct tracing_map {
  */
 struct tracing_map_ops {
        int                     (*elt_alloc)(struct tracing_map_elt *elt);
-       void                    (*elt_copy)(struct tracing_map_elt *to,
-                                           struct tracing_map_elt *from);
        void                    (*elt_free)(struct tracing_map_elt *elt);
        void                    (*elt_clear)(struct tracing_map_elt *elt);
        void                    (*elt_init)(struct tracing_map_elt *elt);
@@ -248,6 +245,7 @@ tracing_map_create(unsigned int map_bits,
 extern int tracing_map_init(struct tracing_map *map);
 
 extern int tracing_map_add_sum_field(struct tracing_map *map);
+extern int tracing_map_add_var(struct tracing_map *map);
 extern int tracing_map_add_key_field(struct tracing_map *map,
                                     unsigned int offset,
                                     tracing_map_cmp_fn_t cmp_fn);
@@ -267,7 +265,13 @@ extern int tracing_map_cmp_none(void *val_a, void *val_b);
 
 extern void tracing_map_update_sum(struct tracing_map_elt *elt,
                                   unsigned int i, u64 n);
+extern void tracing_map_set_var(struct tracing_map_elt *elt,
+                               unsigned int i, u64 n);
+extern bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i);
 extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i);
+
 extern void tracing_map_set_field_descr(struct tracing_map *map,
                                        unsigned int i,
                                        unsigned int key_offset,
index 913fe4336d2b75a0d7cd6697d3bcdf5436b9c5ed..dcd6be1996fe7ffcdb2dd0d15d516ffaea52df4b 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/proc_ns.h>
 #include <linux/sched/task.h>
 
+static struct kmem_cache *uts_ns_cache __ro_after_init;
+
 static struct ucounts *inc_uts_namespaces(struct user_namespace *ns)
 {
        return inc_ucount(ns, current_euid(), UCOUNT_UTS_NAMESPACES);
@@ -33,7 +35,7 @@ static struct uts_namespace *create_uts_ns(void)
 {
        struct uts_namespace *uts_ns;
 
-       uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+       uts_ns = kmem_cache_alloc(uts_ns_cache, GFP_KERNEL);
        if (uts_ns)
                kref_init(&uts_ns->kref);
        return uts_ns;
@@ -42,7 +44,7 @@ static struct uts_namespace *create_uts_ns(void)
 /*
  * Clone a new ns copying an original utsname, setting refcount to 1
  * @old_ns: namespace to clone
- * Return ERR_PTR(-ENOMEM) on error (failure to kmalloc), new ns otherwise
+ * Return ERR_PTR(-ENOMEM) on error (failure to allocate), new ns otherwise
  */
 static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
                                          struct uts_namespace *old_ns)
@@ -75,7 +77,7 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
        return ns;
 
 fail_free:
-       kfree(ns);
+       kmem_cache_free(uts_ns_cache, ns);
 fail_dec:
        dec_uts_namespaces(ucounts);
 fail:
@@ -113,7 +115,7 @@ void free_uts_ns(struct kref *kref)
        dec_uts_namespaces(ns->ucounts);
        put_user_ns(ns->user_ns);
        ns_free_inum(&ns->ns);
-       kfree(ns);
+       kmem_cache_free(uts_ns_cache, ns);
 }
 
 static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
@@ -169,3 +171,13 @@ const struct proc_ns_operations utsns_operations = {
        .install        = utsns_install,
        .owner          = utsns_owner,
 };
+
+void __init uts_ns_init(void)
+{
+       uts_ns_cache = kmem_cache_create_usercopy(
+                       "uts_namespace", sizeof(struct uts_namespace), 0,
+                       SLAB_PANIC|SLAB_ACCOUNT,
+                       offsetof(struct uts_namespace, name),
+                       sizeof_field(struct uts_namespace, name),
+                       NULL);
+}
index 51c6bf0d93c61328b40ebc64b209b1b52a19f56d..c40c7b734cd17efda88aa13c148cd56999684c8c 100644 (file)
@@ -800,6 +800,30 @@ config SOFTLOCKUP_DETECTOR
          chance to run.  The current stack trace is displayed upon
          detection and the system will stay locked up.
 
+config BOOTPARAM_SOFTLOCKUP_PANIC
+       bool "Panic (Reboot) On Soft Lockups"
+       depends on SOFTLOCKUP_DETECTOR
+       help
+         Say Y here to enable the kernel to panic on "soft lockups",
+         which are bugs that cause the kernel to loop in kernel
+         mode for more than 20 seconds (configurable using the watchdog_thresh
+         sysctl), without giving other tasks a chance to run.
+
+         The panic can be used in combination with panic_timeout,
+         to cause the system to reboot automatically after a
+         lockup has been detected. This feature is useful for
+         high-availability systems that have uptime guarantees and
+         where a lockup must be resolved ASAP.
+
+         Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+       int
+       depends on SOFTLOCKUP_DETECTOR
+       range 0 1
+       default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+       default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
 config HARDLOCKUP_DETECTOR_PERF
        bool
        select SOFTLOCKUP_DETECTOR
@@ -849,30 +873,6 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
        default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
        default 1 if BOOTPARAM_HARDLOCKUP_PANIC
 
-config BOOTPARAM_SOFTLOCKUP_PANIC
-       bool "Panic (Reboot) On Soft Lockups"
-       depends on SOFTLOCKUP_DETECTOR
-       help
-         Say Y here to enable the kernel to panic on "soft lockups",
-         which are bugs that cause the kernel to loop in kernel
-         mode for more than 20 seconds (configurable using the watchdog_thresh
-         sysctl), without giving other tasks a chance to run.
-
-         The panic can be used in combination with panic_timeout,
-         to cause the system to reboot automatically after a
-         lockup has been detected. This feature is useful for
-         high-availability systems that have uptime guarantees and
-         where a lockup must be resolved ASAP.
-
-         Say N if unsure.
-
-config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
-       int
-       depends on SOFTLOCKUP_DETECTOR
-       range 0 1
-       default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
-       default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
-
 config DETECT_HUNG_TASK
        bool "Detect Hung Tasks"
        depends on DEBUG_KERNEL
index a669c193b8785b51bc9b0876ef7765b66b61062a..19d42ea75ec225d385734e5212680df0ef9609c6 100644 (file)
@@ -46,3 +46,10 @@ config UBSAN_NULL
        help
          This option enables detection of memory accesses via a
          null pointer.
+
+config TEST_UBSAN
+       tristate "Module for testing for undefined behavior detection"
+       depends on m && UBSAN
+       help
+         This is a test module for UBSAN.
+         It triggers various undefined behavior, and detect it.
index 8fc0d3a9b34f7b61d84d57f6e5ef497c5622192a..ce20696d5a92e7dcb0c22701fc83754ec77fe77d 100644 (file)
@@ -53,6 +53,9 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
+CFLAGS_test_kasan.o += -fno-builtin
+obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
+UBSAN_SANITIZE_test_ubsan.o := y
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
index a34db8d276676782ca8d45f286827e7074e26a37..5d5424b51b746fe64a38bde879d985e6e6780284 100644 (file)
@@ -21,13 +21,13 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
                      struct list_head *next)
 {
        if (CHECK_DATA_CORRUPTION(next->prev != prev,
-                       "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+                       "list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
                        prev, next->prev, next) ||
            CHECK_DATA_CORRUPTION(prev->next != next,
-                       "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+                       "list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n",
                        next, prev->next, prev) ||
            CHECK_DATA_CORRUPTION(new == prev || new == next,
-                       "list_add double add: new=%p, prev=%p, next=%p.\n",
+                       "list_add double add: new=%px, prev=%px, next=%px.\n",
                        new, prev, next))
                return false;
 
@@ -43,16 +43,16 @@ bool __list_del_entry_valid(struct list_head *entry)
        next = entry->next;
 
        if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
-                       "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+                       "list_del corruption, %px->next is LIST_POISON1 (%px)\n",
                        entry, LIST_POISON1) ||
            CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
-                       "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+                       "list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
                        entry, LIST_POISON2) ||
            CHECK_DATA_CORRUPTION(prev->next != entry,
-                       "list_del corruption. prev->next should be %p, but was %p\n",
+                       "list_del corruption. prev->next should be %px, but was %px\n",
                        entry, prev->next) ||
            CHECK_DATA_CORRUPTION(next->prev != entry,
-                       "list_del corruption. next->prev should be %p, but was %p\n",
+                       "list_del corruption. next->prev should be %px, but was %px\n",
                        entry, next->prev))
                return false;
 
index 8e00138d593fd3acf09716e7edf6c769c9ceee5b..da9e10c827df554040b8c5eac71badbd2b588a95 100644 (file)
@@ -146,7 +146,7 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 
 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
 {
-       return root->gfp_mask & __GFP_BITS_MASK;
+       return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
 }
 
 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -2285,6 +2285,7 @@ void __init radix_tree_init(void)
        int ret;
 
        BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
+       BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
        radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
                        sizeof(struct radix_tree_node), 0,
                        SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
index 47aeb04c1997bf45c943985cfda6f08ea716b92d..de7cc540450fdce59b0b96704431d42dc0c149f7 100644 (file)
@@ -719,7 +719,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
                goto out_warn;
 
        *dma_handle = __phys_to_dma(dev, phys_addr);
-       if (dma_coherent_ok(dev, *dma_handle, size))
+       if (!dma_coherent_ok(dev, *dma_handle, size))
                goto out_unmap;
 
        memset(phys_to_virt(phys_addr), 0, size);
index 413367cf569ea69bb9369ccebed686eca337d3b4..de16f7869fb19b72b6db134fbc13f8d3b4495f90 100644 (file)
@@ -296,15 +296,17 @@ static void __init test_bitmap_parselist(void)
        }
 }
 
+#define EXP_BYTES      (sizeof(exp) * 8)
+
 static void __init test_bitmap_arr32(void)
 {
-       unsigned int nbits, next_bit, len = sizeof(exp) * 8;
+       unsigned int nbits, next_bit;
        u32 arr[sizeof(exp) / 4];
-       DECLARE_BITMAP(bmap2, len);
+       DECLARE_BITMAP(bmap2, EXP_BYTES);
 
        memset(arr, 0xa5, sizeof(arr));
 
-       for (nbits = 0; nbits < len; ++nbits) {
+       for (nbits = 0; nbits < EXP_BYTES; ++nbits) {
                bitmap_to_arr32(arr, exp, nbits);
                bitmap_from_arr32(bmap2, arr, nbits);
                expect_eq_bitmap(bmap2, exp, nbits);
@@ -316,7 +318,7 @@ static void __init test_bitmap_arr32(void)
                                " tail is not safely cleared: %d\n",
                                nbits, next_bit);
 
-               if (nbits < len - 32)
+               if (nbits < EXP_BYTES - 32)
                        expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
                                                                0xa5a5a5a5);
        }
index 98854a64b014dbbf761deb2c4fcda1512a456b68..ec657105edbf08e2c87ea1acc8288e6e10bf6e6b 100644 (file)
@@ -567,7 +567,15 @@ static noinline void __init kmem_cache_invalid_free(void)
                return;
        }
 
+       /* Trigger invalid free, the object doesn't get freed */
        kmem_cache_free(cache, p + 1);
+
+       /*
+        * Properly free the object to prevent the "Objects remaining in
+        * test_cache on __kmem_cache_shutdown" BUG failure.
+        */
+       kmem_cache_free(cache, p);
+
        kmem_cache_destroy(cache);
 }
 
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
new file mode 100644 (file)
index 0000000..280f497
--- /dev/null
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+typedef void(*test_ubsan_fp)(void);
+
+static void test_ubsan_add_overflow(void)
+{
+       volatile int val = INT_MAX;
+
+       val += 2;
+}
+
+static void test_ubsan_sub_overflow(void)
+{
+       volatile int val = INT_MIN;
+       volatile int val2 = 2;
+
+       val -= val2;
+}
+
+static void test_ubsan_mul_overflow(void)
+{
+       volatile int val = INT_MAX / 2;
+
+       val *= 3;
+}
+
+static void test_ubsan_negate_overflow(void)
+{
+       volatile int val = INT_MIN;
+
+       val = -val;
+}
+
+static void test_ubsan_divrem_overflow(void)
+{
+       volatile int val = 16;
+       volatile int val2 = 0;
+
+       val /= val2;
+}
+
+static void test_ubsan_vla_bound_not_positive(void)
+{
+       volatile int size = -1;
+       char buf[size];
+
+       (void)buf;
+}
+
+static void test_ubsan_shift_out_of_bounds(void)
+{
+       volatile int val = -1;
+       int val2 = 10;
+
+       val2 <<= val;
+}
+
+static void test_ubsan_out_of_bounds(void)
+{
+       volatile int i = 4, j = 5;
+       volatile int arr[i];
+
+       arr[j] = i;
+}
+
+static void test_ubsan_load_invalid_value(void)
+{
+       volatile char *dst, *src;
+       bool val, val2, *ptr;
+       char c = 4;
+
+       dst = (char *)&val;
+       src = &c;
+       *dst = *src;
+
+       ptr = &val2;
+       val2 = val;
+}
+
+static void test_ubsan_null_ptr_deref(void)
+{
+       volatile int *ptr = NULL;
+       int val;
+
+       val = *ptr;
+}
+
+static void test_ubsan_misaligned_access(void)
+{
+       volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
+       volatile int *ptr, val = 6;
+
+       ptr = (int *)(arr + 1);
+       *ptr = val;
+}
+
+static void test_ubsan_object_size_mismatch(void)
+{
+       /* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
+       volatile int val __aligned(8) = 4;
+       volatile long long *ptr, val2;
+
+       ptr = (long long *)&val;
+       val2 = *ptr;
+}
+
+static const test_ubsan_fp test_ubsan_array[] = {
+       test_ubsan_add_overflow,
+       test_ubsan_sub_overflow,
+       test_ubsan_mul_overflow,
+       test_ubsan_negate_overflow,
+       test_ubsan_divrem_overflow,
+       test_ubsan_vla_bound_not_positive,
+       test_ubsan_shift_out_of_bounds,
+       test_ubsan_out_of_bounds,
+       test_ubsan_load_invalid_value,
+       //test_ubsan_null_ptr_deref, /* exclude it because there is a crash */
+       test_ubsan_misaligned_access,
+       test_ubsan_object_size_mismatch,
+};
+
+static int __init test_ubsan_init(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(test_ubsan_array); i++)
+               test_ubsan_array[i]();
+
+       (void)test_ubsan_null_ptr_deref; /* to avoid unsed-function warning */
+       return 0;
+}
+module_init(test_ubsan_init);
+
+static void __exit test_ubsan_exit(void)
+{
+       /* do nothing */
+}
+module_exit(test_ubsan_exit);
+
+MODULE_AUTHOR("Jinbum Park <jinb.park7@gmail.com>");
+MODULE_LICENSE("GPL v2");
index d7a708f82559ca38e768cb93fc0febb999711e4c..30c0cb8cc9bce78089cb6ad48bcb6b3d5d02e6b2 100644 (file)
@@ -336,7 +336,7 @@ char *put_dec(char *buf, unsigned long long n)
  *
  * If speed is not important, use snprintf(). It's easy to read the code.
  */
-int num_to_str(char *buf, int size, unsigned long long num)
+int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
 {
        /* put_dec requires 2-byte alignment of the buffer. */
        char tmp[sizeof(num) * 3] __aligned(2);
@@ -350,11 +350,21 @@ int num_to_str(char *buf, int size, unsigned long long num)
                len = put_dec(tmp, num) - tmp;
        }
 
-       if (len > size)
+       if (len > size || width > size)
                return 0;
+
+       if (width > len) {
+               width = width - len;
+               for (idx = 0; idx < width; idx++)
+                       buf[idx] = ' ';
+       } else {
+               width = 0;
+       }
+
        for (idx = 0; idx < len; ++idx)
-               buf[idx] = tmp[len - idx - 1];
-       return len;
+               buf[idx + width] = tmp[len - idx - 1];
+
+       return len + width;
 }
 
 #define SIGN   1               /* unsigned/signed, must be 1 */
@@ -2591,6 +2601,8 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
                        case 's':
                        case 'F':
                        case 'f':
+                       case 'x':
+                       case 'K':
                                save_arg(void *);
                                break;
                        default:
@@ -2765,6 +2777,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
                        case 's':
                        case 'F':
                        case 'f':
+                       case 'x':
+                       case 'K':
                                process = true;
                                break;
                        default:
index 08b9aab631ab35616b6a8205495a713ef57102ad..023190c69dce71c528cccb7498e7456d41f674d3 100644 (file)
@@ -1020,23 +1020,18 @@ EXPORT_SYMBOL(congestion_wait);
 
 /**
  * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
- * @pgdat: A pgdat to check if it is heavily congested
  * @sync: SYNC or ASYNC IO
  * @timeout: timeout in jiffies
  *
- * In the event of a congested backing_dev (any backing_dev) and the given
- * @pgdat has experienced recent congestion, this waits for up to @timeout
- * jiffies for either a BDI to exit congestion of the given @sync queue
- * or a write to complete.
- *
- * In the absence of pgdat congestion, cond_resched() is called to yield
- * the processor if necessary but otherwise does not sleep.
+ * In the event of a congested backing_dev (any backing_dev) this waits
+ * for up to @timeout jiffies for either a BDI to exit congestion of the
+ * given @sync queue or a write to complete.
  *
  * The return value is 0 if the sleep is for the full timeout. Otherwise,
  * it is the number of jiffies that were still remaining when the function
  * returned. return_value == timeout implies the function did not sleep.
  */
-long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
+long wait_iff_congested(int sync, long timeout)
 {
        long ret;
        unsigned long start = jiffies;
@@ -1044,12 +1039,10 @@ long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
        wait_queue_head_t *wqh = &congestion_wqh[sync];
 
        /*
-        * If there is no congestion, or heavy congestion is not being
-        * encountered in the current pgdat, yield if necessary instead
+        * If there is no congestion, yield if necessary instead
         * of sleeping on the congestion queue
         */
-       if (atomic_read(&nr_wb_congested[sync]) == 0 ||
-           !test_bit(PGDAT_CONGESTED, &pgdat->flags)) {
+       if (atomic_read(&nr_wb_congested[sync]) == 0) {
                cond_resched();
 
                /* In case we scheduled, work out time remaining */
index 5809bbe360d7fb724a435309e6e693a98f9efbfb..aa40e6c7b042e95f5fb30a24cb05014c55ce9152 100644 (file)
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -39,6 +39,7 @@
 #include <trace/events/cma.h>
 
 #include "cma.h"
+#include "internal.h"
 
 struct cma cma_areas[MAX_CMA_AREAS];
 unsigned cma_area_count;
@@ -109,23 +110,25 @@ static int __init cma_activate_area(struct cma *cma)
        if (!cma->bitmap)
                return -ENOMEM;
 
-       WARN_ON_ONCE(!pfn_valid(pfn));
-       zone = page_zone(pfn_to_page(pfn));
-
        do {
                unsigned j;
 
                base_pfn = pfn;
+               if (!pfn_valid(base_pfn))
+                       goto err;
+
+               zone = page_zone(pfn_to_page(base_pfn));
                for (j = pageblock_nr_pages; j; --j, pfn++) {
-                       WARN_ON_ONCE(!pfn_valid(pfn));
+                       if (!pfn_valid(pfn))
+                               goto err;
+
                        /*
-                        * alloc_contig_range requires the pfn range
-                        * specified to be in the same zone. Make this
-                        * simple by forcing the entire CMA resv range
-                        * to be in the same zone.
+                        * In init_cma_reserved_pageblock(), present_pages
+                        * is adjusted with assumption that all pages in
+                        * the pageblock come from a single zone.
                         */
                        if (page_zone(pfn_to_page(pfn)) != zone)
-                               goto not_in_zone;
+                               goto err;
                }
                init_cma_reserved_pageblock(pfn_to_page(base_pfn));
        } while (--i);
@@ -139,7 +142,7 @@ static int __init cma_activate_area(struct cma *cma)
 
        return 0;
 
-not_in_zone:
+err:
        pr_err("CMA area %s could not be activated\n", cma->name);
        kfree(cma->bitmap);
        cma->count = 0;
@@ -149,6 +152,41 @@ not_in_zone:
 static int __init cma_init_reserved_areas(void)
 {
        int i;
+       struct zone *zone;
+       pg_data_t *pgdat;
+
+       if (!cma_area_count)
+               return 0;
+
+       for_each_online_pgdat(pgdat) {
+               unsigned long start_pfn = UINT_MAX, end_pfn = 0;
+
+               zone = &pgdat->node_zones[ZONE_MOVABLE];
+
+               /*
+                * In this case, we cannot adjust the zone range
+                * since it is now maximum node span and we don't
+                * know original zone range.
+                */
+               if (populated_zone(zone))
+                       continue;
+
+               for (i = 0; i < cma_area_count; i++) {
+                       if (pfn_to_nid(cma_areas[i].base_pfn) !=
+                               pgdat->node_id)
+                               continue;
+
+                       start_pfn = min(start_pfn, cma_areas[i].base_pfn);
+                       end_pfn = max(end_pfn, cma_areas[i].base_pfn +
+                                               cma_areas[i].count);
+               }
+
+               if (!end_pfn)
+                       continue;
+
+               zone->zone_start_pfn = start_pfn;
+               zone->spanned_pages = end_pfn - start_pfn;
+       }
 
        for (i = 0; i < cma_area_count; i++) {
                int ret = cma_activate_area(&cma_areas[i]);
@@ -157,9 +195,32 @@ static int __init cma_init_reserved_areas(void)
                        return ret;
        }
 
+       /*
+        * Reserved pages for ZONE_MOVABLE are now activated and
+        * this would change ZONE_MOVABLE's managed page counter and
+        * the other zones' present counter. We need to re-calculate
+        * various zone information that depends on this initialization.
+        */
+       build_all_zonelists(NULL);
+       for_each_populated_zone(zone) {
+               if (zone_idx(zone) == ZONE_MOVABLE) {
+                       zone_pcp_reset(zone);
+                       setup_zone_pageset(zone);
+               } else
+                       zone_pcp_update(zone);
+
+               set_zone_contiguous(zone);
+       }
+
+       /*
+        * We need to re-init per zone wmark by calling
+        * init_per_zone_wmark_min() but doesn't call here because it is
+        * registered on core_initcall and it will be called later than us.
+        */
+
        return 0;
 }
-core_initcall(cma_init_reserved_areas);
+pure_initcall(cma_init_reserved_areas);
 
 /**
  * cma_init_reserved_mem() - create custom contiguous area from reserved memory
index 88d01a50a0151ec80112f81348ea3115d8ad57ab..028b7210a669636bf9ce58bdd67bde71cf23bc6c 100644 (file)
@@ -1166,8 +1166,7 @@ static void isolate_freepages(struct compact_control *cc)
  * from the isolated freelists in the block we are migrating to.
  */
 static struct page *compaction_alloc(struct page *migratepage,
-                                       unsigned long data,
-                                       int **result)
+                                       unsigned long data)
 {
        struct compact_control *cc = (struct compact_control *)data;
        struct page *freepage;
@@ -1451,14 +1450,12 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
         * if compaction succeeds.
         * For costly orders, we require low watermark instead of min for
         * compaction to proceed to increase its chances.
-        * ALLOC_CMA is used, as pages in CMA pageblocks are considered
-        * suitable migration targets
         */
        watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
                                low_wmark_pages(zone) : min_wmark_pages(zone);
        watermark += compact_gap(order);
        if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
-                                               ALLOC_CMA, wmark_target))
+                                               0, wmark_target))
                return COMPACT_SKIPPED;
 
        return COMPACT_CONTINUE;
index 693f62212a59a704dec99516d1d8a975b59bc7ee..ab77e19ab09c0f447e932c1d63c1261b481289e0 100644 (file)
@@ -66,7 +66,7 @@
  *  ->i_mmap_rwsem             (truncate_pagecache)
  *    ->private_lock           (__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock            (exclusive_swap_page, others)
- *        ->mapping->tree_lock
+ *        ->i_pages lock
  *
  *  ->i_mutex
  *    ->i_mmap_rwsem           (truncate->unmap_mapping_range)
@@ -74,7 +74,7 @@
  *  ->mmap_sem
  *    ->i_mmap_rwsem
  *      ->page_table_lock or pte_lock  (various, mainly in memory.c)
- *        ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock)
+ *        ->i_pages lock       (arch-dependent flush_dcache_mmap_lock)
  *
  *  ->mmap_sem
  *    ->lock_page              (access_process_vm)
@@ -84,7 +84,7 @@
  *
  *  bdi->wb.list_lock
  *    sb_lock                  (fs/fs-writeback.c)
- *    ->mapping->tree_lock     (__sync_single_inode)
+ *    ->i_pages lock           (__sync_single_inode)
  *
  *  ->i_mmap_rwsem
  *    ->anon_vma.lock          (vma_adjust)
  *  ->page_table_lock or pte_lock
  *    ->swap_lock              (try_to_unmap_one)
  *    ->private_lock           (try_to_unmap_one)
- *    ->tree_lock              (try_to_unmap_one)
+ *    ->i_pages lock           (try_to_unmap_one)
  *    ->zone_lru_lock(zone)    (follow_page->mark_page_accessed)
  *    ->zone_lru_lock(zone)    (check_pte_range->isolate_lru_page)
  *    ->private_lock           (page_remove_rmap->set_page_dirty)
- *    ->tree_lock              (page_remove_rmap->set_page_dirty)
+ *    ->i_pages lock           (page_remove_rmap->set_page_dirty)
  *    bdi.wb->list_lock                (page_remove_rmap->set_page_dirty)
  *    ->inode->i_lock          (page_remove_rmap->set_page_dirty)
  *    ->memcg->move_lock       (page_remove_rmap->lock_page_memcg)
@@ -118,14 +118,15 @@ static int page_cache_tree_insert(struct address_space *mapping,
        void **slot;
        int error;
 
-       error = __radix_tree_create(&mapping->page_tree, page->index, 0,
+       error = __radix_tree_create(&mapping->i_pages, page->index, 0,
                                    &node, &slot);
        if (error)
                return error;
        if (*slot) {
                void *p;
 
-               p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+               p = radix_tree_deref_slot_protected(slot,
+                                                   &mapping->i_pages.xa_lock);
                if (!radix_tree_exceptional_entry(p))
                        return -EEXIST;
 
@@ -133,7 +134,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
                if (shadowp)
                        *shadowp = p;
        }
-       __radix_tree_replace(&mapping->page_tree, node, slot, page,
+       __radix_tree_replace(&mapping->i_pages, node, slot, page,
                             workingset_lookup_update(mapping));
        mapping->nrpages++;
        return 0;
@@ -155,13 +156,13 @@ static void page_cache_tree_delete(struct address_space *mapping,
                struct radix_tree_node *node;
                void **slot;
 
-               __radix_tree_lookup(&mapping->page_tree, page->index + i,
+               __radix_tree_lookup(&mapping->i_pages, page->index + i,
                                    &node, &slot);
 
                VM_BUG_ON_PAGE(!node && nr != 1, page);
 
-               radix_tree_clear_tags(&mapping->page_tree, node, slot);
-               __radix_tree_replace(&mapping->page_tree, node, slot, shadow,
+               radix_tree_clear_tags(&mapping->i_pages, node, slot);
+               __radix_tree_replace(&mapping->i_pages, node, slot, shadow,
                                workingset_lookup_update(mapping));
        }
 
@@ -253,7 +254,7 @@ static void unaccount_page_cache_page(struct address_space *mapping,
 /*
  * Delete a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
- * is safe.  The caller must hold the mapping's tree_lock.
+ * is safe.  The caller must hold the i_pages lock.
  */
 void __delete_from_page_cache(struct page *page, void *shadow)
 {
@@ -296,9 +297,9 @@ void delete_from_page_cache(struct page *page)
        unsigned long flags;
 
        BUG_ON(!PageLocked(page));
-       spin_lock_irqsave(&mapping->tree_lock, flags);
+       xa_lock_irqsave(&mapping->i_pages, flags);
        __delete_from_page_cache(page, NULL);
-       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
 
        page_cache_free_page(mapping, page);
 }
@@ -309,14 +310,14 @@ EXPORT_SYMBOL(delete_from_page_cache);
  * @mapping: the mapping to which pages belong
  * @pvec: pagevec with pages to delete
  *
- * The function walks over mapping->page_tree and removes pages passed in @pvec
- * from the radix tree. The function expects @pvec to be sorted by page index.
- * It tolerates holes in @pvec (radix tree entries at those indices are not
+ * The function walks over mapping->i_pages and removes pages passed in @pvec
+ * from the mapping. The function expects @pvec to be sorted by page index.
+ * It tolerates holes in @pvec (mapping entries at those indices are not
  * modified). The function expects only THP head pages to be present in the
- * @pvec and takes care to delete all corresponding tail pages from the radix
- * tree as well.
+ * @pvec and takes care to delete all corresponding tail pages from the
+ * mapping as well.
  *
- * The function expects mapping->tree_lock to be held.
+ * The function expects the i_pages lock to be held.
  */
 static void
 page_cache_tree_delete_batch(struct address_space *mapping,
@@ -330,11 +331,11 @@ page_cache_tree_delete_batch(struct address_space *mapping,
        pgoff_t start;
 
        start = pvec->pages[0]->index;
-       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
                if (i >= pagevec_count(pvec) && !tail_pages)
                        break;
                page = radix_tree_deref_slot_protected(slot,
-                                                      &mapping->tree_lock);
+                                                      &mapping->i_pages.xa_lock);
                if (radix_tree_exceptional_entry(page))
                        continue;
                if (!tail_pages) {
@@ -357,8 +358,8 @@ page_cache_tree_delete_batch(struct address_space *mapping,
                } else {
                        tail_pages--;
                }
-               radix_tree_clear_tags(&mapping->page_tree, iter.node, slot);
-               __radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL,
+               radix_tree_clear_tags(&mapping->i_pages, iter.node, slot);
+               __radix_tree_replace(&mapping->i_pages, iter.node, slot, NULL,
                                workingset_lookup_update(mapping));
                total_pages++;
        }
@@ -374,14 +375,14 @@ void delete_from_page_cache_batch(struct address_space *mapping,
        if (!pagevec_count(pvec))
                return;
 
-       spin_lock_irqsave(&mapping->tree_lock, flags);
+       xa_lock_irqsave(&mapping->i_pages, flags);
        for (i = 0; i < pagevec_count(pvec); i++) {
                trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
 
                unaccount_page_cache_page(mapping, pvec->pages[i]);
        }
        page_cache_tree_delete_batch(mapping, pvec);
-       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
 
        for (i = 0; i < pagevec_count(pvec); i++)
                page_cache_free_page(mapping, pvec->pages[i]);
@@ -798,7 +799,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
                new->mapping = mapping;
                new->index = offset;
 
-               spin_lock_irqsave(&mapping->tree_lock, flags);
+               xa_lock_irqsave(&mapping->i_pages, flags);
                __delete_from_page_cache(old, NULL);
                error = page_cache_tree_insert(mapping, new, NULL);
                BUG_ON(error);
@@ -810,7 +811,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
                        __inc_node_page_state(new, NR_FILE_PAGES);
                if (PageSwapBacked(new))
                        __inc_node_page_state(new, NR_SHMEM);
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
                mem_cgroup_migrate(old, new);
                radix_tree_preload_end();
                if (freepage)
@@ -852,7 +853,7 @@ static int __add_to_page_cache_locked(struct page *page,
        page->mapping = mapping;
        page->index = offset;
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
        error = page_cache_tree_insert(mapping, page, shadowp);
        radix_tree_preload_end();
        if (unlikely(error))
@@ -861,7 +862,7 @@ static int __add_to_page_cache_locked(struct page *page,
        /* hugetlb pages do not participate in page cache accounting. */
        if (!huge)
                __inc_node_page_state(page, NR_FILE_PAGES);
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
        if (!huge)
                mem_cgroup_commit_charge(page, memcg, false, false);
        trace_mm_filemap_add_to_page_cache(page);
@@ -869,7 +870,7 @@ static int __add_to_page_cache_locked(struct page *page,
 err_insert:
        page->mapping = NULL;
        /* Leave page->index set: truncation relies upon it */
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
        if (!huge)
                mem_cgroup_cancel_charge(page, memcg, false);
        put_page(page);
@@ -1353,7 +1354,7 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
        for (i = 0; i < max_scan; i++) {
                struct page *page;
 
-               page = radix_tree_lookup(&mapping->page_tree, index);
+               page = radix_tree_lookup(&mapping->i_pages, index);
                if (!page || radix_tree_exceptional_entry(page))
                        break;
                index++;
@@ -1394,7 +1395,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
        for (i = 0; i < max_scan; i++) {
                struct page *page;
 
-               page = radix_tree_lookup(&mapping->page_tree, index);
+               page = radix_tree_lookup(&mapping->i_pages, index);
                if (!page || radix_tree_exceptional_entry(page))
                        break;
                index--;
@@ -1427,7 +1428,7 @@ struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
        rcu_read_lock();
 repeat:
        page = NULL;
-       pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
+       pagep = radix_tree_lookup_slot(&mapping->i_pages, offset);
        if (pagep) {
                page = radix_tree_deref_slot(pagep);
                if (unlikely(!page))
@@ -1633,7 +1634,7 @@ unsigned find_get_entries(struct address_space *mapping,
                return 0;
 
        rcu_read_lock();
-       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
                struct page *head, *page;
 repeat:
                page = radix_tree_deref_slot(slot);
@@ -1710,7 +1711,7 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
                return 0;
 
        rcu_read_lock();
-       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, *start) {
+       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, *start) {
                struct page *head, *page;
 
                if (iter.index > end)
@@ -1795,7 +1796,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
                return 0;
 
        rcu_read_lock();
-       radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
+       radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
                struct page *head, *page;
 repeat:
                page = radix_tree_deref_slot(slot);
@@ -1875,8 +1876,7 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
                return 0;
 
        rcu_read_lock();
-       radix_tree_for_each_tagged(slot, &mapping->page_tree,
-                                  &iter, *index, tag) {
+       radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, *index, tag) {
                struct page *head, *page;
 
                if (iter.index > end)
@@ -1969,8 +1969,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
                return 0;
 
        rcu_read_lock();
-       radix_tree_for_each_tagged(slot, &mapping->page_tree,
-                                  &iter, start, tag) {
+       radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start, tag) {
                struct page *head, *page;
 repeat:
                page = radix_tree_deref_slot(slot);
@@ -2624,8 +2623,7 @@ void filemap_map_pages(struct vm_fault *vmf,
        struct page *head, *page;
 
        rcu_read_lock();
-       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
-                       start_pgoff) {
+       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start_pgoff) {
                if (iter.index > end_pgoff)
                        break;
 repeat:
index 320545b98ff55997029476f32e09dbf5d4f5f009..486dc394a5a3cd1fe226e215717631619c8a4195 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -160,6 +160,32 @@ static void hmm_invalidate_range(struct hmm *hmm,
        up_read(&hmm->mirrors_sem);
 }
 
+static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+       struct hmm_mirror *mirror;
+       struct hmm *hmm = mm->hmm;
+
+       down_write(&hmm->mirrors_sem);
+       mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror,
+                                         list);
+       while (mirror) {
+               list_del_init(&mirror->list);
+               if (mirror->ops->release) {
+                       /*
+                        * Drop mirrors_sem so callback can wait on any pending
+                        * work that might itself trigger mmu_notifier callback
+                        * and thus would deadlock with us.
+                        */
+                       up_write(&hmm->mirrors_sem);
+                       mirror->ops->release(mirror);
+                       down_write(&hmm->mirrors_sem);
+               }
+               mirror = list_first_entry_or_null(&hmm->mirrors,
+                                                 struct hmm_mirror, list);
+       }
+       up_write(&hmm->mirrors_sem);
+}
+
 static void hmm_invalidate_range_start(struct mmu_notifier *mn,
                                       struct mm_struct *mm,
                                       unsigned long start,
@@ -185,6 +211,7 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn,
 }
 
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
+       .release                = hmm_release,
        .invalidate_range_start = hmm_invalidate_range_start,
        .invalidate_range_end   = hmm_invalidate_range_end,
 };
@@ -206,13 +233,24 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
        if (!mm || !mirror || !mirror->ops)
                return -EINVAL;
 
+again:
        mirror->hmm = hmm_register(mm);
        if (!mirror->hmm)
                return -ENOMEM;
 
        down_write(&mirror->hmm->mirrors_sem);
-       list_add(&mirror->list, &mirror->hmm->mirrors);
-       up_write(&mirror->hmm->mirrors_sem);
+       if (mirror->hmm->mm == NULL) {
+               /*
+                * A racing hmm_mirror_unregister() is about to destroy the hmm
+                * struct. Try again to allocate a new one.
+                */
+               up_write(&mirror->hmm->mirrors_sem);
+               mirror->hmm = NULL;
+               goto again;
+       } else {
+               list_add(&mirror->list, &mirror->hmm->mirrors);
+               up_write(&mirror->hmm->mirrors_sem);
+       }
 
        return 0;
 }
@@ -227,11 +265,32 @@ EXPORT_SYMBOL(hmm_mirror_register);
  */
 void hmm_mirror_unregister(struct hmm_mirror *mirror)
 {
-       struct hmm *hmm = mirror->hmm;
+       bool should_unregister = false;
+       struct mm_struct *mm;
+       struct hmm *hmm;
 
+       if (mirror->hmm == NULL)
+               return;
+
+       hmm = mirror->hmm;
        down_write(&hmm->mirrors_sem);
-       list_del(&mirror->list);
+       list_del_init(&mirror->list);
+       should_unregister = list_empty(&hmm->mirrors);
+       mirror->hmm = NULL;
+       mm = hmm->mm;
+       hmm->mm = NULL;
        up_write(&hmm->mirrors_sem);
+
+       if (!should_unregister || mm == NULL)
+               return;
+
+       spin_lock(&mm->page_table_lock);
+       if (mm->hmm == hmm)
+               mm->hmm = NULL;
+       spin_unlock(&mm->page_table_lock);
+
+       mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+       kfree(hmm);
 }
 EXPORT_SYMBOL(hmm_mirror_unregister);
 
@@ -240,110 +299,275 @@ struct hmm_vma_walk {
        unsigned long           last;
        bool                    fault;
        bool                    block;
-       bool                    write;
 };
 
-static int hmm_vma_do_fault(struct mm_walk *walk,
-                           unsigned long addr,
-                           hmm_pfn_t *pfn)
+static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
+                           bool write_fault, uint64_t *pfn)
 {
        unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_REMOTE;
        struct hmm_vma_walk *hmm_vma_walk = walk->private;
+       struct hmm_range *range = hmm_vma_walk->range;
        struct vm_area_struct *vma = walk->vma;
        int r;
 
        flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
-       flags |= hmm_vma_walk->write ? FAULT_FLAG_WRITE : 0;
+       flags |= write_fault ? FAULT_FLAG_WRITE : 0;
        r = handle_mm_fault(vma, addr, flags);
        if (r & VM_FAULT_RETRY)
                return -EBUSY;
        if (r & VM_FAULT_ERROR) {
-               *pfn = HMM_PFN_ERROR;
+               *pfn = range->values[HMM_PFN_ERROR];
                return -EFAULT;
        }
 
        return -EAGAIN;
 }
 
-static void hmm_pfns_special(hmm_pfn_t *pfns,
-                            unsigned long addr,
-                            unsigned long end)
-{
-       for (; addr < end; addr += PAGE_SIZE, pfns++)
-               *pfns = HMM_PFN_SPECIAL;
-}
-
 static int hmm_pfns_bad(unsigned long addr,
                        unsigned long end,
                        struct mm_walk *walk)
 {
-       struct hmm_range *range = walk->private;
-       hmm_pfn_t *pfns = range->pfns;
+       struct hmm_vma_walk *hmm_vma_walk = walk->private;
+       struct hmm_range *range = hmm_vma_walk->range;
+       uint64_t *pfns = range->pfns;
        unsigned long i;
 
        i = (addr - range->start) >> PAGE_SHIFT;
        for (; addr < end; addr += PAGE_SIZE, i++)
-               pfns[i] = HMM_PFN_ERROR;
+               pfns[i] = range->values[HMM_PFN_ERROR];
 
        return 0;
 }
 
-static void hmm_pfns_clear(hmm_pfn_t *pfns,
-                          unsigned long addr,
-                          unsigned long end)
-{
-       for (; addr < end; addr += PAGE_SIZE, pfns++)
-               *pfns = 0;
-}
-
-static int hmm_vma_walk_hole(unsigned long addr,
-                            unsigned long end,
-                            struct mm_walk *walk)
+/*
+ * hmm_vma_walk_hole() - handle a range lacking valid pmd or pte(s)
+ * @start: range virtual start address (inclusive)
+ * @end: range virtual end address (exclusive)
+ * @fault: should we fault or not ?
+ * @write_fault: write fault ?
+ * @walk: mm_walk structure
+ * Returns: 0 on success, -EAGAIN after page fault, or page fault error
+ *
+ * This function will be called whenever pmd_none() or pte_none() returns true,
+ * or whenever there is no page directory covering the virtual address range.
+ */
+static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
+                             bool fault, bool write_fault,
+                             struct mm_walk *walk)
 {
        struct hmm_vma_walk *hmm_vma_walk = walk->private;
        struct hmm_range *range = hmm_vma_walk->range;
-       hmm_pfn_t *pfns = range->pfns;
+       uint64_t *pfns = range->pfns;
        unsigned long i;
 
        hmm_vma_walk->last = addr;
        i = (addr - range->start) >> PAGE_SHIFT;
        for (; addr < end; addr += PAGE_SIZE, i++) {
-               pfns[i] = HMM_PFN_EMPTY;
-               if (hmm_vma_walk->fault) {
+               pfns[i] = range->values[HMM_PFN_NONE];
+               if (fault || write_fault) {
                        int ret;
 
-                       ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
+                       ret = hmm_vma_do_fault(walk, addr, write_fault,
+                                              &pfns[i]);
                        if (ret != -EAGAIN)
                                return ret;
                }
        }
 
-       return hmm_vma_walk->fault ? -EAGAIN : 0;
+       return (fault || write_fault) ? -EAGAIN : 0;
 }
 
-static int hmm_vma_walk_clear(unsigned long addr,
-                             unsigned long end,
-                             struct mm_walk *walk)
+static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+                                     uint64_t pfns, uint64_t cpu_flags,
+                                     bool *fault, bool *write_fault)
 {
-       struct hmm_vma_walk *hmm_vma_walk = walk->private;
        struct hmm_range *range = hmm_vma_walk->range;
-       hmm_pfn_t *pfns = range->pfns;
+
+       *fault = *write_fault = false;
+       if (!hmm_vma_walk->fault)
+               return;
+
+       /* We aren't ask to do anything ... */
+       if (!(pfns & range->flags[HMM_PFN_VALID]))
+               return;
+       /* If this is device memory than only fault if explicitly requested */
+       if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
+               /* Do we fault on device memory ? */
+               if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) {
+                       *write_fault = pfns & range->flags[HMM_PFN_WRITE];
+                       *fault = true;
+               }
+               return;
+       }
+
+       /* If CPU page table is not valid then we need to fault */
+       *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]);
+       /* Need to write fault ? */
+       if ((pfns & range->flags[HMM_PFN_WRITE]) &&
+           !(cpu_flags & range->flags[HMM_PFN_WRITE])) {
+               *write_fault = true;
+               *fault = true;
+       }
+}
+
+static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+                                const uint64_t *pfns, unsigned long npages,
+                                uint64_t cpu_flags, bool *fault,
+                                bool *write_fault)
+{
        unsigned long i;
 
-       hmm_vma_walk->last = addr;
+       if (!hmm_vma_walk->fault) {
+               *fault = *write_fault = false;
+               return;
+       }
+
+       for (i = 0; i < npages; ++i) {
+               hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags,
+                                  fault, write_fault);
+               if ((*fault) || (*write_fault))
+                       return;
+       }
+}
+
+static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
+                            struct mm_walk *walk)
+{
+       struct hmm_vma_walk *hmm_vma_walk = walk->private;
+       struct hmm_range *range = hmm_vma_walk->range;
+       bool fault, write_fault;
+       unsigned long i, npages;
+       uint64_t *pfns;
+
        i = (addr - range->start) >> PAGE_SHIFT;
-       for (; addr < end; addr += PAGE_SIZE, i++) {
-               pfns[i] = 0;
-               if (hmm_vma_walk->fault) {
-                       int ret;
+       npages = (end - addr) >> PAGE_SHIFT;
+       pfns = &range->pfns[i];
+       hmm_range_need_fault(hmm_vma_walk, pfns, npages,
+                            0, &fault, &write_fault);
+       return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+}
 
-                       ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
-                       if (ret != -EAGAIN)
-                               return ret;
+static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
+{
+       if (pmd_protnone(pmd))
+               return 0;
+       return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] |
+                               range->flags[HMM_PFN_WRITE] :
+                               range->flags[HMM_PFN_VALID];
+}
+
+static int hmm_vma_handle_pmd(struct mm_walk *walk,
+                             unsigned long addr,
+                             unsigned long end,
+                             uint64_t *pfns,
+                             pmd_t pmd)
+{
+       struct hmm_vma_walk *hmm_vma_walk = walk->private;
+       struct hmm_range *range = hmm_vma_walk->range;
+       unsigned long pfn, npages, i;
+       bool fault, write_fault;
+       uint64_t cpu_flags;
+
+       npages = (end - addr) >> PAGE_SHIFT;
+       cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
+       hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags,
+                            &fault, &write_fault);
+
+       if (pmd_protnone(pmd) || fault || write_fault)
+               return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+
+       pfn = pmd_pfn(pmd) + pte_index(addr);
+       for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
+               pfns[i] = hmm_pfn_from_pfn(range, pfn) | cpu_flags;
+       hmm_vma_walk->last = end;
+       return 0;
+}
+
+static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
+{
+       if (pte_none(pte) || !pte_present(pte))
+               return 0;
+       return pte_write(pte) ? range->flags[HMM_PFN_VALID] |
+                               range->flags[HMM_PFN_WRITE] :
+                               range->flags[HMM_PFN_VALID];
+}
+
+static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
+                             unsigned long end, pmd_t *pmdp, pte_t *ptep,
+                             uint64_t *pfn)
+{
+       struct hmm_vma_walk *hmm_vma_walk = walk->private;
+       struct hmm_range *range = hmm_vma_walk->range;
+       struct vm_area_struct *vma = walk->vma;
+       bool fault, write_fault;
+       uint64_t cpu_flags;
+       pte_t pte = *ptep;
+       uint64_t orig_pfn = *pfn;
+
+       *pfn = range->values[HMM_PFN_NONE];
+       cpu_flags = pte_to_hmm_pfn_flags(range, pte);
+       hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+                          &fault, &write_fault);
+
+       if (pte_none(pte)) {
+               if (fault || write_fault)
+                       goto fault;
+               return 0;
+       }
+
+       if (!pte_present(pte)) {
+               swp_entry_t entry = pte_to_swp_entry(pte);
+
+               if (!non_swap_entry(entry)) {
+                       if (fault || write_fault)
+                               goto fault;
+                       return 0;
                }
+
+               /*
+                * This is a special swap entry, ignore migration, use
+                * device and report anything else as error.
+                */
+               if (is_device_private_entry(entry)) {
+                       cpu_flags = range->flags[HMM_PFN_VALID] |
+                               range->flags[HMM_PFN_DEVICE_PRIVATE];
+                       cpu_flags |= is_write_device_private_entry(entry) ?
+                               range->flags[HMM_PFN_WRITE] : 0;
+                       hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+                                          &fault, &write_fault);
+                       if (fault || write_fault)
+                               goto fault;
+                       *pfn = hmm_pfn_from_pfn(range, swp_offset(entry));
+                       *pfn |= cpu_flags;
+                       return 0;
+               }
+
+               if (is_migration_entry(entry)) {
+                       if (fault || write_fault) {
+                               pte_unmap(ptep);
+                               hmm_vma_walk->last = addr;
+                               migration_entry_wait(vma->vm_mm,
+                                                    pmdp, addr);
+                               return -EAGAIN;
+                       }
+                       return 0;
+               }
+
+               /* Report error for everything else */
+               *pfn = range->values[HMM_PFN_ERROR];
+               return -EFAULT;
        }
 
-       return hmm_vma_walk->fault ? -EAGAIN : 0;
+       if (fault || write_fault)
+               goto fault;
+
+       *pfn = hmm_pfn_from_pfn(range, pte_pfn(pte)) | cpu_flags;
+       return 0;
+
+fault:
+       pte_unmap(ptep);
+       /* Fault any virtual address we were asked to fault */
+       return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 }
 
 static int hmm_vma_walk_pmd(pmd_t *pmdp,
@@ -353,26 +577,20 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 {
        struct hmm_vma_walk *hmm_vma_walk = walk->private;
        struct hmm_range *range = hmm_vma_walk->range;
-       struct vm_area_struct *vma = walk->vma;
-       hmm_pfn_t *pfns = range->pfns;
+       uint64_t *pfns = range->pfns;
        unsigned long addr = start, i;
-       bool write_fault;
-       hmm_pfn_t flag;
        pte_t *ptep;
 
        i = (addr - range->start) >> PAGE_SHIFT;
-       flag = vma->vm_flags & VM_READ ? HMM_PFN_READ : 0;
-       write_fault = hmm_vma_walk->fault & hmm_vma_walk->write;
 
 again:
        if (pmd_none(*pmdp))
                return hmm_vma_walk_hole(start, end, walk);
 
-       if (pmd_huge(*pmdp) && vma->vm_flags & VM_HUGETLB)
+       if (pmd_huge(*pmdp) && (range->vma->vm_flags & VM_HUGETLB))
                return hmm_pfns_bad(start, end, walk);
 
        if (pmd_devmap(*pmdp) || pmd_trans_huge(*pmdp)) {
-               unsigned long pfn;
                pmd_t pmd;
 
                /*
@@ -388,17 +606,8 @@ again:
                barrier();
                if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
                        goto again;
-               if (pmd_protnone(pmd))
-                       return hmm_vma_walk_clear(start, end, walk);
 
-               if (write_fault && !pmd_write(pmd))
-                       return hmm_vma_walk_clear(start, end, walk);
-
-               pfn = pmd_pfn(pmd) + pte_index(addr);
-               flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
-               for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
-                       pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
-               return 0;
+               return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd);
        }
 
        if (pmd_bad(*pmdp))
@@ -406,79 +615,43 @@ again:
 
        ptep = pte_offset_map(pmdp, addr);
        for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
-               pte_t pte = *ptep;
-
-               pfns[i] = 0;
+               int r;
 
-               if (pte_none(pte)) {
-                       pfns[i] = HMM_PFN_EMPTY;
-                       if (hmm_vma_walk->fault)
-                               goto fault;
-                       continue;
+               r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
+               if (r) {
+                       /* hmm_vma_handle_pte() did unmap pte directory */
+                       hmm_vma_walk->last = addr;
+                       return r;
                }
-
-               if (!pte_present(pte)) {
-                       swp_entry_t entry = pte_to_swp_entry(pte);
-
-                       if (!non_swap_entry(entry)) {
-                               if (hmm_vma_walk->fault)
-                                       goto fault;
-                               continue;
-                       }
-
-                       /*
-                        * This is a special swap entry, ignore migration, use
-                        * device and report anything else as error.
-                        */
-                       if (is_device_private_entry(entry)) {
-                               pfns[i] = hmm_pfn_t_from_pfn(swp_offset(entry));
-                               if (is_write_device_private_entry(entry)) {
-                                       pfns[i] |= HMM_PFN_WRITE;
-                               } else if (write_fault)
-                                       goto fault;
-                               pfns[i] |= HMM_PFN_DEVICE_UNADDRESSABLE;
-                               pfns[i] |= flag;
-                       } else if (is_migration_entry(entry)) {
-                               if (hmm_vma_walk->fault) {
-                                       pte_unmap(ptep);
-                                       hmm_vma_walk->last = addr;
-                                       migration_entry_wait(vma->vm_mm,
-                                                            pmdp, addr);
-                                       return -EAGAIN;
-                               }
-                               continue;
-                       } else {
-                               /* Report error for everything else */
-                               pfns[i] = HMM_PFN_ERROR;
-                       }
-                       continue;
-               }
-
-               if (write_fault && !pte_write(pte))
-                       goto fault;
-
-               pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
-               pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
-               continue;
-
-fault:
-               pte_unmap(ptep);
-               /* Fault all pages in range */
-               return hmm_vma_walk_clear(start, end, walk);
        }
        pte_unmap(ptep - 1);
 
+       hmm_vma_walk->last = addr;
        return 0;
 }
 
+static void hmm_pfns_clear(struct hmm_range *range,
+                          uint64_t *pfns,
+                          unsigned long addr,
+                          unsigned long end)
+{
+       for (; addr < end; addr += PAGE_SIZE, pfns++)
+               *pfns = range->values[HMM_PFN_NONE];
+}
+
+static void hmm_pfns_special(struct hmm_range *range)
+{
+       unsigned long addr = range->start, i = 0;
+
+       for (; addr < range->end; addr += PAGE_SIZE, i++)
+               range->pfns[i] = range->values[HMM_PFN_SPECIAL];
+}
+
 /*
  * hmm_vma_get_pfns() - snapshot CPU page table for a range of virtual addresses
- * @vma: virtual memory area containing the virtual address range
- * @range: used to track snapshot validity
- * @start: range virtual start address (inclusive)
- * @end: range virtual end address (exclusive)
- * @entries: array of hmm_pfn_t: provided by the caller, filled in by function
- * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, 0 success
+ * @range: range being snapshotted
+ * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid
+ *          vma permission, 0 success
  *
  * This snapshots the CPU page table for a range of virtual addresses. Snapshot
  * validity is tracked by range struct. See hmm_vma_range_done() for further
@@ -491,26 +664,17 @@ fault:
  * NOT CALLING hmm_vma_range_done() IF FUNCTION RETURNS 0 WILL LEAD TO SERIOUS
  * MEMORY CORRUPTION ! YOU HAVE BEEN WARNED !
  */
-int hmm_vma_get_pfns(struct vm_area_struct *vma,
-                    struct hmm_range *range,
-                    unsigned long start,
-                    unsigned long end,
-                    hmm_pfn_t *pfns)
+int hmm_vma_get_pfns(struct hmm_range *range)
 {
+       struct vm_area_struct *vma = range->vma;
        struct hmm_vma_walk hmm_vma_walk;
        struct mm_walk mm_walk;
        struct hmm *hmm;
 
-       /* FIXME support hugetlb fs */
-       if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
-               hmm_pfns_special(pfns, start, end);
-               return -EINVAL;
-       }
-
        /* Sanity check, this really should not happen ! */
-       if (start < vma->vm_start || start >= vma->vm_end)
+       if (range->start < vma->vm_start || range->start >= vma->vm_end)
                return -EINVAL;
-       if (end < vma->vm_start || end > vma->vm_end)
+       if (range->end < vma->vm_start || range->end > vma->vm_end)
                return -EINVAL;
 
        hmm = hmm_register(vma->vm_mm);
@@ -520,10 +684,24 @@ int hmm_vma_get_pfns(struct vm_area_struct *vma,
        if (!hmm->mmu_notifier.ops)
                return -EINVAL;
 
+       /* FIXME support hugetlb fs */
+       if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+               hmm_pfns_special(range);
+               return -EINVAL;
+       }
+
+       if (!(vma->vm_flags & VM_READ)) {
+               /*
+                * If vma do not allow read access, then assume that it does
+                * not allow write access, either. Architecture that allow
+                * write without read access are not supported by HMM, because
+                * operations such has atomic access would not work.
+                */
+               hmm_pfns_clear(range, range->pfns, range->start, range->end);
+               return -EPERM;
+       }
+
        /* Initialize range to track CPU page table update */
-       range->start = start;
-       range->pfns = pfns;
-       range->end = end;
        spin_lock(&hmm->lock);
        range->valid = true;
        list_add_rcu(&range->list, &hmm->ranges);
@@ -541,14 +719,13 @@ int hmm_vma_get_pfns(struct vm_area_struct *vma,
        mm_walk.pmd_entry = hmm_vma_walk_pmd;
        mm_walk.pte_hole = hmm_vma_walk_hole;
 
-       walk_page_range(start, end, &mm_walk);
+       walk_page_range(range->start, range->end, &mm_walk);
        return 0;
 }
 EXPORT_SYMBOL(hmm_vma_get_pfns);
 
 /*
  * hmm_vma_range_done() - stop tracking change to CPU page table over a range
- * @vma: virtual memory area containing the virtual address range
  * @range: range being tracked
  * Returns: false if range data has been invalidated, true otherwise
  *
@@ -568,10 +745,10 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
  *
  * There are two ways to use this :
  * again:
- *   hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ *   hmm_vma_get_pfns(range); or hmm_vma_fault(...);
  *   trans = device_build_page_table_update_transaction(pfns);
  *   device_page_table_lock();
- *   if (!hmm_vma_range_done(vma, range)) {
+ *   if (!hmm_vma_range_done(range)) {
  *     device_page_table_unlock();
  *     goto again;
  *   }
@@ -579,13 +756,13 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
  *   device_page_table_unlock();
  *
  * Or:
- *   hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ *   hmm_vma_get_pfns(range); or hmm_vma_fault(...);
  *   device_page_table_lock();
- *   hmm_vma_range_done(vma, range);
- *   device_update_page_table(pfns);
+ *   hmm_vma_range_done(range);
+ *   device_update_page_table(range->pfns);
  *   device_page_table_unlock();
  */
-bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range)
+bool hmm_vma_range_done(struct hmm_range *range)
 {
        unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
        struct hmm *hmm;
@@ -595,7 +772,7 @@ bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range)
                return false;
        }
 
-       hmm = hmm_register(vma->vm_mm);
+       hmm = hmm_register(range->vma->vm_mm);
        if (!hmm) {
                memset(range->pfns, 0, sizeof(*range->pfns) * npages);
                return false;
@@ -611,36 +788,34 @@ EXPORT_SYMBOL(hmm_vma_range_done);
 
 /*
  * hmm_vma_fault() - try to fault some address in a virtual address range
- * @vma: virtual memory area containing the virtual address range
- * @range: use to track pfns array content validity
- * @start: fault range virtual start address (inclusive)
- * @end: fault range virtual end address (exclusive)
- * @pfns: array of hmm_pfn_t, only entry with fault flag set will be faulted
- * @write: is it a write fault
+ * @range: range being faulted
  * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
  * Returns: 0 success, error otherwise (-EAGAIN means mmap_sem have been drop)
  *
  * This is similar to a regular CPU page fault except that it will not trigger
  * any memory migration if the memory being faulted is not accessible by CPUs.
  *
- * On error, for one virtual address in the range, the function will set the
- * hmm_pfn_t error flag for the corresponding pfn entry.
+ * On error, for one virtual address in the range, the function will mark the
+ * corresponding HMM pfn entry with an error flag.
  *
  * Expected use pattern:
  * retry:
  *   down_read(&mm->mmap_sem);
  *   // Find vma and address device wants to fault, initialize hmm_pfn_t
  *   // array accordingly
- *   ret = hmm_vma_fault(vma, start, end, pfns, allow_retry);
+ *   ret = hmm_vma_fault(range, write, block);
  *   switch (ret) {
  *   case -EAGAIN:
- *     hmm_vma_range_done(vma, range);
+ *     hmm_vma_range_done(range);
  *     // You might want to rate limit or yield to play nicely, you may
  *     // also commit any valid pfn in the array assuming that you are
  *     // getting true from hmm_vma_range_monitor_end()
  *     goto retry;
  *   case 0:
  *     break;
+ *   case -ENOMEM:
+ *   case -EINVAL:
+ *   case -EPERM:
  *   default:
  *     // Handle error !
  *     up_read(&mm->mmap_sem)
@@ -648,7 +823,7 @@ EXPORT_SYMBOL(hmm_vma_range_done);
  *   }
  *   // Take device driver lock that serialize device page table update
  *   driver_lock_device_page_table_update();
- *   hmm_vma_range_done(vma, range);
+ *   hmm_vma_range_done(range);
  *   // Commit pfns we got from hmm_vma_fault()
  *   driver_unlock_device_page_table_update();
  *   up_read(&mm->mmap_sem)
@@ -658,51 +833,54 @@ EXPORT_SYMBOL(hmm_vma_range_done);
  *
  * YOU HAVE BEEN WARNED !
  */
-int hmm_vma_fault(struct vm_area_struct *vma,
-                 struct hmm_range *range,
-                 unsigned long start,
-                 unsigned long end,
-                 hmm_pfn_t *pfns,
-                 bool write,
-                 bool block)
+int hmm_vma_fault(struct hmm_range *range, bool block)
 {
+       struct vm_area_struct *vma = range->vma;
+       unsigned long start = range->start;
        struct hmm_vma_walk hmm_vma_walk;
        struct mm_walk mm_walk;
        struct hmm *hmm;
        int ret;
 
        /* Sanity check, this really should not happen ! */
-       if (start < vma->vm_start || start >= vma->vm_end)
+       if (range->start < vma->vm_start || range->start >= vma->vm_end)
                return -EINVAL;
-       if (end < vma->vm_start || end > vma->vm_end)
+       if (range->end < vma->vm_start || range->end > vma->vm_end)
                return -EINVAL;
 
        hmm = hmm_register(vma->vm_mm);
        if (!hmm) {
-               hmm_pfns_clear(pfns, start, end);
+               hmm_pfns_clear(range, range->pfns, range->start, range->end);
                return -ENOMEM;
        }
        /* Caller must have registered a mirror using hmm_mirror_register() */
        if (!hmm->mmu_notifier.ops)
                return -EINVAL;
 
+       /* FIXME support hugetlb fs */
+       if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+               hmm_pfns_special(range);
+               return -EINVAL;
+       }
+
+       if (!(vma->vm_flags & VM_READ)) {
+               /*
+                * If vma do not allow read access, then assume that it does
+                * not allow write access, either. Architecture that allow
+                * write without read access are not supported by HMM, because
+                * operations such has atomic access would not work.
+                */
+               hmm_pfns_clear(range, range->pfns, range->start, range->end);
+               return -EPERM;
+       }
+
        /* Initialize range to track CPU page table update */
-       range->start = start;
-       range->pfns = pfns;
-       range->end = end;
        spin_lock(&hmm->lock);
        range->valid = true;
        list_add_rcu(&range->list, &hmm->ranges);
        spin_unlock(&hmm->lock);
 
-       /* FIXME support hugetlb fs */
-       if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
-               hmm_pfns_special(pfns, start, end);
-               return 0;
-       }
-
        hmm_vma_walk.fault = true;
-       hmm_vma_walk.write = write;
        hmm_vma_walk.block = block;
        hmm_vma_walk.range = range;
        mm_walk.private = &hmm_vma_walk;
@@ -717,7 +895,7 @@ int hmm_vma_fault(struct vm_area_struct *vma,
        mm_walk.pte_hole = hmm_vma_walk_hole;
 
        do {
-               ret = walk_page_range(start, end, &mm_walk);
+               ret = walk_page_range(start, range->end, &mm_walk);
                start = hmm_vma_walk.last;
        } while (ret == -EAGAIN);
 
@@ -725,8 +903,9 @@ int hmm_vma_fault(struct vm_area_struct *vma,
                unsigned long i;
 
                i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
-               hmm_pfns_clear(&pfns[i], hmm_vma_walk.last, end);
-               hmm_vma_range_done(vma, range);
+               hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last,
+                              range->end);
+               hmm_vma_range_done(range);
        }
        return ret;
 }
@@ -845,13 +1024,6 @@ static void hmm_devmem_release(struct device *dev, void *data)
        hmm_devmem_radix_release(resource);
 }
 
-static struct hmm_devmem *hmm_devmem_find(resource_size_t phys)
-{
-       WARN_ON_ONCE(!rcu_read_lock_held());
-
-       return radix_tree_lookup(&hmm_devmem_radix, phys >> PA_SECTION_SHIFT);
-}
-
 static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
 {
        resource_size_t key, align_start, align_size, align_end;
@@ -892,9 +1064,8 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
        for (key = align_start; key <= align_end; key += PA_SECTION_SIZE) {
                struct hmm_devmem *dup;
 
-               rcu_read_lock();
-               dup = hmm_devmem_find(key);
-               rcu_read_unlock();
+               dup = radix_tree_lookup(&hmm_devmem_radix,
+                                       key >> PA_SECTION_SHIFT);
                if (dup) {
                        dev_err(device, "%s: collides with mapping for %s\n",
                                __func__, dev_name(dup->device));
index f0ae8d1d4329d5c72c9bcf9e460e16a7815a1059..14ed6ee5e02fc8bc6acc767de9e42ed464ce5675 100644 (file)
@@ -555,8 +555,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
        VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-       if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
-                                 true)) {
+       if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
                put_page(page);
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
@@ -1317,7 +1316,7 @@ alloc:
        }
 
        if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
-                               huge_gfp | __GFP_NORETRY, &memcg, true))) {
+                                       huge_gfp, &memcg, true))) {
                put_page(new_page);
                split_huge_pmd(vma, vmf->pmd, vmf->address);
                if (page)
@@ -2402,6 +2401,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
 
        page_tail->index = head->index + tail;
        page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
+
+       /*
+        * always add to the tail because some iterators expect new
+        * pages to show after the currently processed elements - e.g.
+        * migrate_pages
+        */
        lru_add_page_tail(head, page_tail, lruvec, list);
 }
 
@@ -2445,7 +2450,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        } else {
                /* Additional pin to radix tree */
                page_ref_add(head, 2);
-               spin_unlock(&head->mapping->tree_lock);
+               xa_unlock(&head->mapping->i_pages);
        }
 
        spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
@@ -2653,15 +2658,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        if (mapping) {
                void **pslot;
 
-               spin_lock(&mapping->tree_lock);
-               pslot = radix_tree_lookup_slot(&mapping->page_tree,
+               xa_lock(&mapping->i_pages);
+               pslot = radix_tree_lookup_slot(&mapping->i_pages,
                                page_index(head));
                /*
                 * Check if the head page is present in radix tree.
                 * We assume all tail are present too, if head is there.
                 */
                if (radix_tree_deref_slot_protected(pslot,
-                                       &mapping->tree_lock) != head)
+                                       &mapping->i_pages.xa_lock) != head)
                        goto fail;
        }
 
@@ -2695,7 +2700,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                }
                spin_unlock(&pgdata->split_queue_lock);
 fail:          if (mapping)
-                       spin_unlock(&mapping->tree_lock);
+                       xa_unlock(&mapping->i_pages);
                spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
                unfreeze_page(head);
                ret = -EBUSY;
index e6bd35182daee1226b684464fd202df86762c636..62d8c34e63d54dbf45c2d0d4c3fd215d92933e2c 100644 (file)
@@ -168,6 +168,9 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
                                        gfp_t gfp_flags);
 extern int user_min_free_kbytes;
 
+extern void set_zone_contiguous(struct zone *zone);
+extern void clear_zone_contiguous(struct zone *zone);
+
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 /*
@@ -495,7 +498,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 #define ALLOC_HARDER           0x10 /* try to alloc harder */
 #define ALLOC_HIGH             0x20 /* __GFP_HIGH set */
 #define ALLOC_CPUSET           0x40 /* check for correct cpuset */
-#define ALLOC_CMA              0x80 /* allow allocations from CMA areas */
 
 enum ttu_flags;
 struct tlbflush_unmap_batch;
@@ -538,4 +540,5 @@ static inline bool is_migrate_highatomic_page(struct page *page)
 }
 
 void setup_zone_pageset(struct zone *zone);
+extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
 #endif /* __MM_INTERNAL_H */
index e42568284e06038ab70ec1344f63a2e5182ea90d..d7b2a4bf8671643e3345b7ac197570582ac14aef 100644 (file)
@@ -965,9 +965,7 @@ static void collapse_huge_page(struct mm_struct *mm,
                goto out_nolock;
        }
 
-       /* Do not oom kill for khugepaged charges */
-       if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
-                                          &memcg, true))) {
+       if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
                result = SCAN_CGROUP_CHARGE_FAIL;
                goto out_nolock;
        }
@@ -1326,9 +1324,7 @@ static void collapse_shmem(struct mm_struct *mm,
                goto out;
        }
 
-       /* Do not oom kill for khugepaged charges */
-       if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
-                                          &memcg, true))) {
+       if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
                result = SCAN_CGROUP_CHARGE_FAIL;
                goto out;
        }
@@ -1348,8 +1344,8 @@ static void collapse_shmem(struct mm_struct *mm,
         */
 
        index = start;
-       spin_lock_irq(&mapping->tree_lock);
-       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+       xa_lock_irq(&mapping->i_pages);
+       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
                int n = min(iter.index, end) - index;
 
                /*
@@ -1362,7 +1358,7 @@ static void collapse_shmem(struct mm_struct *mm,
                }
                nr_none += n;
                for (; index < min(iter.index, end); index++) {
-                       radix_tree_insert(&mapping->page_tree, index,
+                       radix_tree_insert(&mapping->i_pages, index,
                                        new_page + (index % HPAGE_PMD_NR));
                }
 
@@ -1371,16 +1367,16 @@ static void collapse_shmem(struct mm_struct *mm,
                        break;
 
                page = radix_tree_deref_slot_protected(slot,
-                               &mapping->tree_lock);
+                               &mapping->i_pages.xa_lock);
                if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
-                       spin_unlock_irq(&mapping->tree_lock);
+                       xa_unlock_irq(&mapping->i_pages);
                        /* swap in or instantiate fallocated page */
                        if (shmem_getpage(mapping->host, index, &page,
                                                SGP_NOHUGE)) {
                                result = SCAN_FAIL;
                                goto tree_unlocked;
                        }
-                       spin_lock_irq(&mapping->tree_lock);
+                       xa_lock_irq(&mapping->i_pages);
                } else if (trylock_page(page)) {
                        get_page(page);
                } else {
@@ -1389,7 +1385,7 @@ static void collapse_shmem(struct mm_struct *mm,
                }
 
                /*
-                * The page must be locked, so we can drop the tree_lock
+                * The page must be locked, so we can drop the i_pages lock
                 * without racing with truncate.
                 */
                VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -1400,7 +1396,7 @@ static void collapse_shmem(struct mm_struct *mm,
                        result = SCAN_TRUNCATED;
                        goto out_unlock;
                }
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
 
                if (isolate_lru_page(page)) {
                        result = SCAN_DEL_PAGE_LRU;
@@ -1410,11 +1406,11 @@ static void collapse_shmem(struct mm_struct *mm,
                if (page_mapped(page))
                        unmap_mapping_pages(mapping, index, 1, false);
 
-               spin_lock_irq(&mapping->tree_lock);
+               xa_lock_irq(&mapping->i_pages);
 
-               slot = radix_tree_lookup_slot(&mapping->page_tree, index);
+               slot = radix_tree_lookup_slot(&mapping->i_pages, index);
                VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
-                                       &mapping->tree_lock), page);
+                                       &mapping->i_pages.xa_lock), page);
                VM_BUG_ON_PAGE(page_mapped(page), page);
 
                /*
@@ -1435,14 +1431,14 @@ static void collapse_shmem(struct mm_struct *mm,
                list_add_tail(&page->lru, &pagelist);
 
                /* Finally, replace with the new page. */
-               radix_tree_replace_slot(&mapping->page_tree, slot,
+               radix_tree_replace_slot(&mapping->i_pages, slot,
                                new_page + (index % HPAGE_PMD_NR));
 
                slot = radix_tree_iter_resume(slot, &iter);
                index++;
                continue;
 out_lru:
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                putback_lru_page(page);
 out_isolate_failed:
                unlock_page(page);
@@ -1468,14 +1464,14 @@ out_unlock:
                }
 
                for (; index < end; index++) {
-                       radix_tree_insert(&mapping->page_tree, index,
+                       radix_tree_insert(&mapping->i_pages, index,
                                        new_page + (index % HPAGE_PMD_NR));
                }
                nr_none += n;
        }
 
 tree_locked:
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
 tree_unlocked:
 
        if (result == SCAN_SUCCEED) {
@@ -1524,9 +1520,8 @@ tree_unlocked:
        } else {
                /* Something went wrong: rollback changes to the radix-tree */
                shmem_uncharge(mapping->host, nr_none);
-               spin_lock_irq(&mapping->tree_lock);
-               radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
-                               start) {
+               xa_lock_irq(&mapping->i_pages);
+               radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
                        if (iter.index >= end)
                                break;
                        page = list_first_entry_or_null(&pagelist,
@@ -1536,8 +1531,7 @@ tree_unlocked:
                                        break;
                                nr_none--;
                                /* Put holes back where they were */
-                               radix_tree_delete(&mapping->page_tree,
-                                                 iter.index);
+                               radix_tree_delete(&mapping->i_pages, iter.index);
                                continue;
                        }
 
@@ -1546,16 +1540,15 @@ tree_unlocked:
                        /* Unfreeze the page. */
                        list_del(&page->lru);
                        page_ref_unfreeze(page, 2);
-                       radix_tree_replace_slot(&mapping->page_tree,
-                                               slot, page);
+                       radix_tree_replace_slot(&mapping->i_pages, slot, page);
                        slot = radix_tree_iter_resume(slot, &iter);
-                       spin_unlock_irq(&mapping->tree_lock);
+                       xa_unlock_irq(&mapping->i_pages);
                        putback_lru_page(page);
                        unlock_page(page);
-                       spin_lock_irq(&mapping->tree_lock);
+                       xa_lock_irq(&mapping->i_pages);
                }
                VM_BUG_ON(nr_none);
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
 
                /* Unfreeze new_page, caller would take care about freeing it */
                page_ref_unfreeze(new_page, 1);
@@ -1583,7 +1576,7 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
        swap = 0;
        memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
        rcu_read_lock();
-       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
                if (iter.index >= start + HPAGE_PMD_NR)
                        break;
 
@@ -1883,8 +1876,16 @@ static void set_recommended_min_free_kbytes(void)
        int nr_zones = 0;
        unsigned long recommended_min;
 
-       for_each_populated_zone(zone)
+       for_each_populated_zone(zone) {
+               /*
+                * We don't need to worry about fragmentation of
+                * ZONE_MOVABLE since it only has movable pages.
+                */
+               if (zone_idx(zone) > gfp_zone(GFP_USER))
+                       continue;
+
                nr_zones++;
+       }
 
        /* Ensure 2 pageblocks are free to assist fragmentation avoidance */
        recommended_min = pageblock_nr_pages * nr_zones * 2;
index e8d6c6210b80c305da1481c7be54680e6459c866..e3cbf9a92f3cdd9519f7724f152d816cd22f3a44 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1131,6 +1131,13 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
        } else {
                newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
                                               vma->vm_page_prot));
+               /*
+                * We're replacing an anonymous page with a zero page, which is
+                * not anonymous. We need to do proper accounting otherwise we
+                * will get wrong values in /proc, and a BUG message in dmesg
+                * when tearing down the mm.
+                */
+               dec_mm_counter(mm, MM_ANONPAGES);
        }
 
        flush_cache_page(vma, addr, pte_pfn(*ptep));
index 9ec024b862aca01cb32d302584e6b110d11107a7..e074f7c637aa4e62d6268d3e765564e6f2a9cde0 100644 (file)
@@ -1485,7 +1485,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 
 static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-       if (!current->memcg_may_oom)
+       if (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER)
                return;
        /*
         * We are in the middle of the charge context here, so we
@@ -1839,7 +1839,7 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
                        }
                }
 
-               for (i = 0; i < MEMCG_NR_EVENTS; i++) {
+               for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
                        long x;
 
                        x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
@@ -1858,7 +1858,7 @@ static void reclaim_high(struct mem_cgroup *memcg,
        do {
                if (page_counter_read(&memcg->memory) <= memcg->high)
                        continue;
-               mem_cgroup_event(memcg, MEMCG_HIGH);
+               memcg_memory_event(memcg, MEMCG_HIGH);
                try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
        } while ((memcg = parent_mem_cgroup(memcg)));
 }
@@ -1949,7 +1949,7 @@ retry:
        if (!gfpflags_allow_blocking(gfp_mask))
                goto nomem;
 
-       mem_cgroup_event(mem_over_limit, MEMCG_MAX);
+       memcg_memory_event(mem_over_limit, MEMCG_MAX);
 
        nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
                                                    gfp_mask, may_swap);
@@ -1992,7 +1992,7 @@ retry:
        if (fatal_signal_pending(current))
                goto force;
 
-       mem_cgroup_event(mem_over_limit, MEMCG_OOM);
+       memcg_memory_event(mem_over_limit, MEMCG_OOM);
 
        mem_cgroup_oom(mem_over_limit, gfp_mask,
                       get_order(nr_pages * PAGE_SIZE));
@@ -2688,10 +2688,10 @@ static void tree_events(struct mem_cgroup *memcg, unsigned long *events)
        struct mem_cgroup *iter;
        int i;
 
-       memset(events, 0, sizeof(*events) * MEMCG_NR_EVENTS);
+       memset(events, 0, sizeof(*events) * NR_VM_EVENT_ITEMS);
 
        for_each_mem_cgroup_tree(iter, memcg) {
-               for (i = 0; i < MEMCG_NR_EVENTS; i++)
+               for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
                        events[i] += memcg_sum_events(iter, i);
        }
 }
@@ -4108,6 +4108,9 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 {
        struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
 
+       if (!pn)
+               return;
+
        free_percpu(pn->lruvec_stat_cpu);
        kfree(pn);
 }
@@ -5178,7 +5181,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
                        continue;
                }
 
-               mem_cgroup_event(memcg, MEMCG_OOM);
+               memcg_memory_event(memcg, MEMCG_OOM);
                if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
                        break;
        }
@@ -5191,10 +5194,14 @@ static int memory_events_show(struct seq_file *m, void *v)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
 
-       seq_printf(m, "low %lu\n", memcg_sum_events(memcg, MEMCG_LOW));
-       seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
-       seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
-       seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
+       seq_printf(m, "low %lu\n",
+                  atomic_long_read(&memcg->memory_events[MEMCG_LOW]));
+       seq_printf(m, "high %lu\n",
+                  atomic_long_read(&memcg->memory_events[MEMCG_HIGH]));
+       seq_printf(m, "max %lu\n",
+                  atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
+       seq_printf(m, "oom %lu\n",
+                  atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
        seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
 
        return 0;
@@ -5204,7 +5211,7 @@ static int memory_stat_show(struct seq_file *m, void *v)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
        unsigned long stat[MEMCG_NR_STAT];
-       unsigned long events[MEMCG_NR_EVENTS];
+       unsigned long events[NR_VM_EVENT_ITEMS];
        int i;
 
        /*
@@ -5967,9 +5974,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 
        /*
         * Interrupts should be disabled here because the caller holds the
-        * mapping->tree_lock lock which is taken with interrupts-off. It is
+        * i_pages lock which is taken with interrupts-off. It is
         * important here to have the interrupts disabled because it is the
-        * only synchronisation we have for udpating the per-CPU variables.
+        * only synchronisation we have for updating the per-CPU variables.
         */
        VM_BUG_ON(!irqs_disabled());
        mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page),
index 2d4bf647cf013b2c5113192f43dc405ce4e13aa4..9d142b9b86dcd970d46a2b8124f065bd5eb29391 100644 (file)
@@ -1487,7 +1487,7 @@ int unpoison_memory(unsigned long pfn)
 }
 EXPORT_SYMBOL(unpoison_memory);
 
-static struct page *new_page(struct page *p, unsigned long private, int **x)
+static struct page *new_page(struct page *p, unsigned long private)
 {
        int nid = page_to_nid(p);
 
index cc6dfa5832ca6d03a6d43a05efb1b31400c00fe5..f74826cdceea1112e648462393f46f034e4d46c2 100644 (file)
@@ -1329,8 +1329,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
        return 0;
 }
 
-static struct page *new_node_page(struct page *page, unsigned long private,
-               int **result)
+static struct page *new_node_page(struct page *page, unsigned long private)
 {
        int nid = page_to_nid(page);
        nodemask_t nmask = node_states[N_MEMORY];
@@ -1373,7 +1372,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                        if (isolate_huge_page(page, &source))
                                move_pages -= 1 << compound_order(head);
                        continue;
-               } else if (thp_migration_supported() && PageTransHuge(page))
+               } else if (PageTransHuge(page))
                        pfn = page_to_pfn(compound_head(page))
                                + hpage_nr_pages(page) - 1;
 
index 01cbb7078d6ca50f3dfca124309e402ba78d7aa3..9ac49ef17b4e1e5128f3db38e0a97bbc067ed1b9 100644 (file)
@@ -446,15 +446,6 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
                __split_huge_pmd(walk->vma, pmd, addr, false, NULL);
                goto out;
        }
-       if (!thp_migration_supported()) {
-               get_page(page);
-               spin_unlock(ptl);
-               lock_page(page);
-               ret = split_huge_page(page);
-               unlock_page(page);
-               put_page(page);
-               goto out;
-       }
        if (!queue_pages_required(page, qp)) {
                ret = 1;
                goto unlock;
@@ -495,7 +486,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 
        if (pmd_trans_unstable(pmd))
                return 0;
-retry:
+
        pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
        for (; addr != end; pte++, addr += PAGE_SIZE) {
                if (!pte_present(*pte))
@@ -511,22 +502,6 @@ retry:
                        continue;
                if (!queue_pages_required(page, qp))
                        continue;
-               if (PageTransCompound(page) && !thp_migration_supported()) {
-                       get_page(page);
-                       pte_unmap_unlock(pte, ptl);
-                       lock_page(page);
-                       ret = split_huge_page(page);
-                       unlock_page(page);
-                       put_page(page);
-                       /* Failed to split -- skip. */
-                       if (ret) {
-                               pte = pte_offset_map_lock(walk->mm, pmd,
-                                               addr, &ptl);
-                               continue;
-                       }
-                       goto retry;
-               }
-
                migrate_page_add(page, qp->pagelist, flags);
        }
        pte_unmap_unlock(pte - 1, ptl);
@@ -942,12 +917,13 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
        }
 }
 
-static struct page *new_node_page(struct page *page, unsigned long node, int **x)
+/* page allocation callback for NUMA node migration */
+struct page *alloc_new_node_page(struct page *page, unsigned long node)
 {
        if (PageHuge(page))
                return alloc_huge_page_node(page_hstate(compound_head(page)),
                                        node);
-       else if (thp_migration_supported() && PageTransHuge(page)) {
+       else if (PageTransHuge(page)) {
                struct page *thp;
 
                thp = alloc_pages_node(node,
@@ -986,7 +962,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
                        flags | MPOL_MF_DISCONTIG_OK, &pagelist);
 
        if (!list_empty(&pagelist)) {
-               err = migrate_pages(&pagelist, new_node_page, NULL, dest,
+               err = migrate_pages(&pagelist, alloc_new_node_page, NULL, dest,
                                        MIGRATE_SYNC, MR_SYSCALL);
                if (err)
                        putback_movable_pages(&pagelist);
@@ -1107,7 +1083,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
  * list of pages handed to migrate_pages()--which is how we get here--
  * is in virtual address order.
  */
-static struct page *new_page(struct page *page, unsigned long start, int **x)
+static struct page *new_page(struct page *page, unsigned long start)
 {
        struct vm_area_struct *vma;
        unsigned long uninitialized_var(address);
@@ -1123,7 +1099,7 @@ static struct page *new_page(struct page *page, unsigned long start, int **x)
        if (PageHuge(page)) {
                return alloc_huge_page_vma(page_hstate(compound_head(page)),
                                vma, address);
-       } else if (thp_migration_supported() && PageTransHuge(page)) {
+       } else if (PageTransHuge(page)) {
                struct page *thp;
 
                thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
@@ -1152,7 +1128,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
        return -ENOSYS;
 }
 
-static struct page *new_page(struct page *page, unsigned long start, int **x)
+static struct page *new_page(struct page *page, unsigned long start)
 {
        return NULL;
 }
index 003886606a2251cab9b1e1e50d128d5d5f1de497..f65dd69e1fd1a7a1dd7c88679ae8aacabe19aacc 100644 (file)
@@ -467,20 +467,21 @@ int migrate_page_move_mapping(struct address_space *mapping,
        oldzone = page_zone(page);
        newzone = page_zone(newpage);
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
 
-       pslot = radix_tree_lookup_slot(&mapping->page_tree,
+       pslot = radix_tree_lookup_slot(&mapping->i_pages,
                                        page_index(page));
 
        expected_count += 1 + page_has_private(page);
        if (page_count(page) != expected_count ||
-               radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
-               spin_unlock_irq(&mapping->tree_lock);
+               radix_tree_deref_slot_protected(pslot,
+                                       &mapping->i_pages.xa_lock) != page) {
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
        if (!page_ref_freeze(page, expected_count)) {
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
@@ -494,7 +495,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
        if (mode == MIGRATE_ASYNC && head &&
                        !buffer_migrate_lock_buffers(head, mode)) {
                page_ref_unfreeze(page, expected_count);
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
@@ -522,7 +523,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
                SetPageDirty(newpage);
        }
 
-       radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+       radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
 
        /*
         * Drop cache reference from old page by unfreezing
@@ -531,7 +532,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
         */
        page_ref_unfreeze(page, expected_count - 1);
 
-       spin_unlock(&mapping->tree_lock);
+       xa_unlock(&mapping->i_pages);
        /* Leave irq disabled to prevent preemption while updating stats */
 
        /*
@@ -574,20 +575,19 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
        int expected_count;
        void **pslot;
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
 
-       pslot = radix_tree_lookup_slot(&mapping->page_tree,
-                                       page_index(page));
+       pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page));
 
        expected_count = 2 + page_has_private(page);
        if (page_count(page) != expected_count ||
-               radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
-               spin_unlock_irq(&mapping->tree_lock);
+               radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) {
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
        if (!page_ref_freeze(page, expected_count)) {
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                return -EAGAIN;
        }
 
@@ -596,11 +596,11 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 
        get_page(newpage);
 
-       radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+       radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
 
        page_ref_unfreeze(page, expected_count - 1);
 
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
 
        return MIGRATEPAGE_SUCCESS;
 }
@@ -1137,10 +1137,12 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
                                   enum migrate_reason reason)
 {
        int rc = MIGRATEPAGE_SUCCESS;
-       int *result = NULL;
        struct page *newpage;
 
-       newpage = get_new_page(page, private, &result);
+       if (!thp_migration_supported() && PageTransHuge(page))
+               return -ENOMEM;
+
+       newpage = get_new_page(page, private);
        if (!newpage)
                return -ENOMEM;
 
@@ -1161,14 +1163,6 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
                goto out;
        }
 
-       if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
-               lock_page(page);
-               rc = split_huge_page(page);
-               unlock_page(page);
-               if (rc)
-                       goto out;
-       }
-
        rc = __unmap_and_move(page, newpage, force, mode);
        if (rc == MIGRATEPAGE_SUCCESS)
                set_page_owner_migrate_reason(newpage, reason);
@@ -1231,12 +1225,6 @@ put_new:
                        put_page(newpage);
        }
 
-       if (result) {
-               if (rc)
-                       *result = rc;
-               else
-                       *result = page_to_nid(newpage);
-       }
        return rc;
 }
 
@@ -1264,7 +1252,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                                enum migrate_mode mode, int reason)
 {
        int rc = -EAGAIN;
-       int *result = NULL;
        int page_was_mapped = 0;
        struct page *new_hpage;
        struct anon_vma *anon_vma = NULL;
@@ -1281,7 +1268,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                return -ENOSYS;
        }
 
-       new_hpage = get_new_page(hpage, private, &result);
+       new_hpage = get_new_page(hpage, private);
        if (!new_hpage)
                return -ENOMEM;
 
@@ -1345,12 +1332,6 @@ out:
        else
                putback_active_hugepage(new_hpage);
 
-       if (result) {
-               if (rc)
-                       *result = rc;
-               else
-                       *result = page_to_nid(new_hpage);
-       }
        return rc;
 }
 
@@ -1395,6 +1376,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
                retry = 0;
 
                list_for_each_entry_safe(page, page2, from, lru) {
+retry:
                        cond_resched();
 
                        if (PageHuge(page))
@@ -1408,6 +1390,26 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 
                        switch(rc) {
                        case -ENOMEM:
+                               /*
+                                * THP migration might be unsupported or the
+                                * allocation could've failed so we should
+                                * retry on the same page with the THP split
+                                * to base pages.
+                                *
+                                * Head page is retried immediately and tail
+                                * pages are added to the tail of the list so
+                                * we encounter them after the rest of the list
+                                * is processed.
+                                */
+                               if (PageTransHuge(page)) {
+                                       lock_page(page);
+                                       rc = split_huge_page_to_list(page, from);
+                                       unlock_page(page);
+                                       if (!rc) {
+                                               list_safe_reset_next(page, page2, lru);
+                                               goto retry;
+                                       }
+                               }
                                nr_failed++;
                                goto out;
                        case -EAGAIN:
@@ -1444,141 +1446,101 @@ out:
 }
 
 #ifdef CONFIG_NUMA
-/*
- * Move a list of individual pages
- */
-struct page_to_node {
-       unsigned long addr;
-       struct page *page;
-       int node;
-       int status;
-};
 
-static struct page *new_page_node(struct page *p, unsigned long private,
-               int **result)
+static int store_status(int __user *status, int start, int value, int nr)
 {
-       struct page_to_node *pm = (struct page_to_node *)private;
-
-       while (pm->node != MAX_NUMNODES && pm->page != p)
-               pm++;
+       while (nr-- > 0) {
+               if (put_user(value, status + start))
+                       return -EFAULT;
+               start++;
+       }
 
-       if (pm->node == MAX_NUMNODES)
-               return NULL;
+       return 0;
+}
 
-       *result = &pm->status;
+static int do_move_pages_to_node(struct mm_struct *mm,
+               struct list_head *pagelist, int node)
+{
+       int err;
 
-       if (PageHuge(p))
-               return alloc_huge_page_node(page_hstate(compound_head(p)),
-                                       pm->node);
-       else if (thp_migration_supported() && PageTransHuge(p)) {
-               struct page *thp;
+       if (list_empty(pagelist))
+               return 0;
 
-               thp = alloc_pages_node(pm->node,
-                       (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
-                       HPAGE_PMD_ORDER);
-               if (!thp)
-                       return NULL;
-               prep_transhuge_page(thp);
-               return thp;
-       } else
-               return __alloc_pages_node(pm->node,
-                               GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
+       err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
+                       MIGRATE_SYNC, MR_SYSCALL);
+       if (err)
+               putback_movable_pages(pagelist);
+       return err;
 }
 
 /*
- * Move a set of pages as indicated in the pm array. The addr
- * field must be set to the virtual address of the page to be moved
- * and the node number must contain a valid target node.
- * The pm array ends with node = MAX_NUMNODES.
+ * Resolves the given address to a struct page, isolates it from the LRU and
+ * puts it to the given pagelist.
+ * Returns -errno if the page cannot be found/isolated or 0 when it has been
+ * queued or the page doesn't need to be migrated because it is already on
+ * the target node
  */
-static int do_move_page_to_node_array(struct mm_struct *mm,
-                                     struct page_to_node *pm,
-                                     int migrate_all)
+static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
+               int node, struct list_head *pagelist, bool migrate_all)
 {
+       struct vm_area_struct *vma;
+       struct page *page;
+       unsigned int follflags;
        int err;
-       struct page_to_node *pp;
-       LIST_HEAD(pagelist);
 
        down_read(&mm->mmap_sem);
+       err = -EFAULT;
+       vma = find_vma(mm, addr);
+       if (!vma || addr < vma->vm_start || !vma_migratable(vma))
+               goto out;
 
-       /*
-        * Build a list of pages to migrate
-        */
-       for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
-               struct vm_area_struct *vma;
-               struct page *page;
-               struct page *head;
-               unsigned int follflags;
-
-               err = -EFAULT;
-               vma = find_vma(mm, pp->addr);
-               if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
-                       goto set_status;
-
-               /* FOLL_DUMP to ignore special (like zero) pages */
-               follflags = FOLL_GET | FOLL_DUMP;
-               if (!thp_migration_supported())
-                       follflags |= FOLL_SPLIT;
-               page = follow_page(vma, pp->addr, follflags);
+       /* FOLL_DUMP to ignore special (like zero) pages */
+       follflags = FOLL_GET | FOLL_DUMP;
+       page = follow_page(vma, addr, follflags);
 
-               err = PTR_ERR(page);
-               if (IS_ERR(page))
-                       goto set_status;
+       err = PTR_ERR(page);
+       if (IS_ERR(page))
+               goto out;
 
-               err = -ENOENT;
-               if (!page)
-                       goto set_status;
+       err = -ENOENT;
+       if (!page)
+               goto out;
 
-               err = page_to_nid(page);
+       err = 0;
+       if (page_to_nid(page) == node)
+               goto out_putpage;
 
-               if (err == pp->node)
-                       /*
-                        * Node already in the right place
-                        */
-                       goto put_and_set;
+       err = -EACCES;
+       if (page_mapcount(page) > 1 && !migrate_all)
+               goto out_putpage;
 
-               err = -EACCES;
-               if (page_mapcount(page) > 1 &&
-                               !migrate_all)
-                       goto put_and_set;
-
-               if (PageHuge(page)) {
-                       if (PageHead(page)) {
-                               isolate_huge_page(page, &pagelist);
-                               err = 0;
-                               pp->page = page;
-                       }
-                       goto put_and_set;
+       if (PageHuge(page)) {
+               if (PageHead(page)) {
+                       isolate_huge_page(page, pagelist);
+                       err = 0;
                }
+       } else {
+               struct page *head;
 
-               pp->page = compound_head(page);
                head = compound_head(page);
                err = isolate_lru_page(head);
-               if (!err) {
-                       list_add_tail(&head->lru, &pagelist);
-                       mod_node_page_state(page_pgdat(head),
-                               NR_ISOLATED_ANON + page_is_file_cache(head),
-                               hpage_nr_pages(head));
-               }
-put_and_set:
-               /*
-                * Either remove the duplicate refcount from
-                * isolate_lru_page() or drop the page ref if it was
-                * not isolated.
-                */
-               put_page(page);
-set_status:
-               pp->status = err;
-       }
-
-       err = 0;
-       if (!list_empty(&pagelist)) {
-               err = migrate_pages(&pagelist, new_page_node, NULL,
-                               (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
                if (err)
-                       putback_movable_pages(&pagelist);
-       }
+                       goto out_putpage;
 
+               err = 0;
+               list_add_tail(&head->lru, pagelist);
+               mod_node_page_state(page_pgdat(head),
+                       NR_ISOLATED_ANON + page_is_file_cache(head),
+                       hpage_nr_pages(head));
+       }
+out_putpage:
+       /*
+        * Either remove the duplicate refcount from
+        * isolate_lru_page() or drop the page ref if it was
+        * not isolated.
+        */
+       put_page(page);
+out:
        up_read(&mm->mmap_sem);
        return err;
 }
@@ -1593,79 +1555,79 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
                         const int __user *nodes,
                         int __user *status, int flags)
 {
-       struct page_to_node *pm;
-       unsigned long chunk_nr_pages;
-       unsigned long chunk_start;
-       int err;
-
-       err = -ENOMEM;
-       pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
-       if (!pm)
-               goto out;
+       int current_node = NUMA_NO_NODE;
+       LIST_HEAD(pagelist);
+       int start, i;
+       int err = 0, err1;
 
        migrate_prep();
 
-       /*
-        * Store a chunk of page_to_node array in a page,
-        * but keep the last one as a marker
-        */
-       chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
-
-       for (chunk_start = 0;
-            chunk_start < nr_pages;
-            chunk_start += chunk_nr_pages) {
-               int j;
+       for (i = start = 0; i < nr_pages; i++) {
+               const void __user *p;
+               unsigned long addr;
+               int node;
 
-               if (chunk_start + chunk_nr_pages > nr_pages)
-                       chunk_nr_pages = nr_pages - chunk_start;
-
-               /* fill the chunk pm with addrs and nodes from user-space */
-               for (j = 0; j < chunk_nr_pages; j++) {
-                       const void __user *p;
-                       int node;
-
-                       err = -EFAULT;
-                       if (get_user(p, pages + j + chunk_start))
-                               goto out_pm;
-                       pm[j].addr = (unsigned long) p;
-
-                       if (get_user(node, nodes + j + chunk_start))
-                               goto out_pm;
-
-                       err = -ENODEV;
-                       if (node < 0 || node >= MAX_NUMNODES)
-                               goto out_pm;
-
-                       if (!node_state(node, N_MEMORY))
-                               goto out_pm;
-
-                       err = -EACCES;
-                       if (!node_isset(node, task_nodes))
-                               goto out_pm;
+               err = -EFAULT;
+               if (get_user(p, pages + i))
+                       goto out_flush;
+               if (get_user(node, nodes + i))
+                       goto out_flush;
+               addr = (unsigned long)p;
+
+               err = -ENODEV;
+               if (node < 0 || node >= MAX_NUMNODES)
+                       goto out_flush;
+               if (!node_state(node, N_MEMORY))
+                       goto out_flush;
 
-                       pm[j].node = node;
+               err = -EACCES;
+               if (!node_isset(node, task_nodes))
+                       goto out_flush;
+
+               if (current_node == NUMA_NO_NODE) {
+                       current_node = node;
+                       start = i;
+               } else if (node != current_node) {
+                       err = do_move_pages_to_node(mm, &pagelist, current_node);
+                       if (err)
+                               goto out;
+                       err = store_status(status, start, current_node, i - start);
+                       if (err)
+                               goto out;
+                       start = i;
+                       current_node = node;
                }
 
-               /* End marker for this chunk */
-               pm[chunk_nr_pages].node = MAX_NUMNODES;
-
-               /* Migrate this chunk */
-               err = do_move_page_to_node_array(mm, pm,
-                                                flags & MPOL_MF_MOVE_ALL);
-               if (err < 0)
-                       goto out_pm;
+               /*
+                * Errors in the page lookup or isolation are not fatal and we simply
+                * report them via status
+                */
+               err = add_page_for_migration(mm, addr, current_node,
+                               &pagelist, flags & MPOL_MF_MOVE_ALL);
+               if (!err)
+                       continue;
 
-               /* Return status information */
-               for (j = 0; j < chunk_nr_pages; j++)
-                       if (put_user(pm[j].status, status + j + chunk_start)) {
-                               err = -EFAULT;
-                               goto out_pm;
-                       }
-       }
-       err = 0;
+               err = store_status(status, i, err, 1);
+               if (err)
+                       goto out_flush;
 
-out_pm:
-       free_page((unsigned long)pm);
+               err = do_move_pages_to_node(mm, &pagelist, current_node);
+               if (err)
+                       goto out;
+               if (i > start) {
+                       err = store_status(status, start, current_node, i - start);
+                       if (err)
+                               goto out;
+               }
+               current_node = NUMA_NO_NODE;
+       }
+out_flush:
+       /* Make sure we do not overwrite the existing error */
+       err1 = do_move_pages_to_node(mm, &pagelist, current_node);
+       if (!err1)
+               err1 = store_status(status, start, current_node, i - start);
+       if (!err)
+               err = err1;
 out:
        return err;
 }
@@ -1866,8 +1828,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
 }
 
 static struct page *alloc_misplaced_dst_page(struct page *page,
-                                          unsigned long data,
-                                          int **result)
+                                          unsigned long data)
 {
        int nid = (int) data;
        struct page *newpage;
@@ -1986,6 +1947,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
            (vma->vm_flags & VM_EXEC))
                goto out;
 
+       /*
+        * Also do not migrate dirty pages as not all filesystems can move
+        * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles.
+        */
+       if (page_is_file_cache(page) && PageDirty(page))
+               goto out;
+
        /*
         * Rate-limit the amount of data that is being migrated to a node.
         * Optimal placement is no good if the memory bus is saturated and
@@ -2339,7 +2307,8 @@ again:
                        ptep_get_and_clear(mm, addr, ptep);
 
                        /* Setup special migration page table entry */
-                       entry = make_migration_entry(page, pte_write(pte));
+                       entry = make_migration_entry(page, mpfn &
+                                                    MIGRATE_PFN_WRITE);
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pte))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
index f2154fc2548b0e13e7e2a61b86e5720765260d1f..188f195883b90b40d8371e8e04ff5acd4d9d1526 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1342,6 +1342,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
                if (!(file && path_noexec(&file->f_path)))
                        prot |= PROT_EXEC;
 
+       /* force arch specific MAP_FIXED handling in get_unmapped_area */
+       if (flags & MAP_FIXED_NOREPLACE)
+               flags |= MAP_FIXED;
+
        if (!(flags & MAP_FIXED))
                addr = round_hint_to_min(addr);
 
@@ -1365,6 +1369,13 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
        if (offset_in_page(addr))
                return addr;
 
+       if (flags & MAP_FIXED_NOREPLACE) {
+               struct vm_area_struct *vma = find_vma(mm, addr);
+
+               if (vma && vma->vm_start <= addr)
+                       return -EEXIST;
+       }
+
        if (prot == PROT_EXEC) {
                pkey = execute_only_pkey(mm);
                if (pkey < 0)
index c1d6af7455da542b9462cfc5ef8d64fa1d386a4f..625608bc89621a4033751717ebc3e8f0b3ab1f30 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/pkeys.h>
 #include <linux/ksm.h>
 #include <linux/uaccess.h>
+#include <linux/mm_inline.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
@@ -89,6 +90,14 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                                    page_mapcount(page) != 1)
                                        continue;
 
+                               /*
+                                * While migration can move some dirty pages,
+                                * it cannot move them all from MIGRATE_ASYNC
+                                * context.
+                                */
+                               if (page_is_file_cache(page) && PageDirty(page))
+                                       continue;
+
                                /* Avoid TLB flush if possible */
                                if (pte_protnone(oldpte))
                                        continue;
index 586f31261c8328e30106254e09e52fa6e93f410e..5c1a3279e63f865664bafcaf4d3c7f98af697cce 100644 (file)
@@ -2099,7 +2099,8 @@ void __init page_writeback_init(void)
  * so that it can tag pages faster than a dirtying process can create them).
  */
 /*
- * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency.
+ * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce the i_pages lock
+ * latency.
  */
 void tag_pages_for_writeback(struct address_space *mapping,
                             pgoff_t start, pgoff_t end)
@@ -2109,22 +2110,22 @@ void tag_pages_for_writeback(struct address_space *mapping,
        struct radix_tree_iter iter;
        void **slot;
 
-       spin_lock_irq(&mapping->tree_lock);
-       radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start,
+       xa_lock_irq(&mapping->i_pages);
+       radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start,
                                                        PAGECACHE_TAG_DIRTY) {
                if (iter.index > end)
                        break;
-               radix_tree_iter_tag_set(&mapping->page_tree, &iter,
+               radix_tree_iter_tag_set(&mapping->i_pages, &iter,
                                                        PAGECACHE_TAG_TOWRITE);
                tagged++;
                if ((tagged % WRITEBACK_TAG_BATCH) != 0)
                        continue;
                slot = radix_tree_iter_resume(slot, &iter);
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                cond_resched();
-               spin_lock_irq(&mapping->tree_lock);
+               xa_lock_irq(&mapping->i_pages);
        }
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
@@ -2467,13 +2468,13 @@ int __set_page_dirty_nobuffers(struct page *page)
                        return 1;
                }
 
-               spin_lock_irqsave(&mapping->tree_lock, flags);
+               xa_lock_irqsave(&mapping->i_pages, flags);
                BUG_ON(page_mapping(page) != mapping);
                WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
                account_page_dirtied(page, mapping);
-               radix_tree_tag_set(&mapping->page_tree, page_index(page),
+               radix_tree_tag_set(&mapping->i_pages, page_index(page),
                                   PAGECACHE_TAG_DIRTY);
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
                unlock_page_memcg(page);
 
                if (mapping->host) {
@@ -2718,11 +2719,10 @@ int test_clear_page_writeback(struct page *page)
                struct backing_dev_info *bdi = inode_to_bdi(inode);
                unsigned long flags;
 
-               spin_lock_irqsave(&mapping->tree_lock, flags);
+               xa_lock_irqsave(&mapping->i_pages, flags);
                ret = TestClearPageWriteback(page);
                if (ret) {
-                       radix_tree_tag_clear(&mapping->page_tree,
-                                               page_index(page),
+                       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
                                                PAGECACHE_TAG_WRITEBACK);
                        if (bdi_cap_account_writeback(bdi)) {
                                struct bdi_writeback *wb = inode_to_wb(inode);
@@ -2736,7 +2736,7 @@ int test_clear_page_writeback(struct page *page)
                                                     PAGECACHE_TAG_WRITEBACK))
                        sb_clear_inode_writeback(mapping->host);
 
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
        } else {
                ret = TestClearPageWriteback(page);
        }
@@ -2766,7 +2766,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
                struct backing_dev_info *bdi = inode_to_bdi(inode);
                unsigned long flags;
 
-               spin_lock_irqsave(&mapping->tree_lock, flags);
+               xa_lock_irqsave(&mapping->i_pages, flags);
                ret = TestSetPageWriteback(page);
                if (!ret) {
                        bool on_wblist;
@@ -2774,8 +2774,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
                        on_wblist = mapping_tagged(mapping,
                                                   PAGECACHE_TAG_WRITEBACK);
 
-                       radix_tree_tag_set(&mapping->page_tree,
-                                               page_index(page),
+                       radix_tree_tag_set(&mapping->i_pages, page_index(page),
                                                PAGECACHE_TAG_WRITEBACK);
                        if (bdi_cap_account_writeback(bdi))
                                inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
@@ -2789,14 +2788,12 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
                                sb_mark_inode_writeback(mapping->host);
                }
                if (!PageDirty(page))
-                       radix_tree_tag_clear(&mapping->page_tree,
-                                               page_index(page),
+                       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
                                                PAGECACHE_TAG_DIRTY);
                if (!keep_write)
-                       radix_tree_tag_clear(&mapping->page_tree,
-                                               page_index(page),
+                       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
                                                PAGECACHE_TAG_TOWRITE);
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
        } else {
                ret = TestSetPageWriteback(page);
        }
@@ -2816,7 +2813,7 @@ EXPORT_SYMBOL(__test_set_page_writeback);
  */
 int mapping_tagged(struct address_space *mapping, int tag)
 {
-       return radix_tree_tagged(&mapping->page_tree, tag);
+       return radix_tree_tagged(&mapping->i_pages, tag);
 }
 EXPORT_SYMBOL(mapping_tagged);
 
index 0b97b8ece4a9fb43b40303dc1fe1cf4dba7c19b5..905db9d7962fcb1776c0e7ffb1618fb6e4084a75 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
 #include <linux/pfn.h>
-#include <xen/xen.h>
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
@@ -205,17 +204,18 @@ static void __free_pages_ok(struct page *page, unsigned int order);
  * TBD: should special case ZONE_DMA32 machines here - in those we normally
  * don't need any ZONE_NORMAL reservation
  */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = {
 #ifdef CONFIG_ZONE_DMA
-        256,
+       [ZONE_DMA] = 256,
 #endif
 #ifdef CONFIG_ZONE_DMA32
-        256,
+       [ZONE_DMA32] = 256,
 #endif
+       [ZONE_NORMAL] = 32,
 #ifdef CONFIG_HIGHMEM
-        32,
+       [ZONE_HIGHMEM] = 0,
 #endif
-        32,
+       [ZONE_MOVABLE] = 0,
 };
 
 EXPORT_SYMBOL(totalram_pages);
@@ -316,9 +316,6 @@ static inline bool update_defer_init(pg_data_t *pgdat,
        /* Always populate low zones for address-constrained allocations */
        if (zone_end < pgdat_end_pfn(pgdat))
                return true;
-       /* Xen PV domains need page structures early */
-       if (xen_pv_domain())
-               return true;
        (*nr_initialised)++;
        if ((*nr_initialised > pgdat->static_init_pgcnt) &&
            (pfn & (PAGES_PER_SECTION - 1)) == 0) {
@@ -1746,16 +1743,38 @@ void __init page_alloc_init_late(void)
 }
 
 #ifdef CONFIG_CMA
+static void __init adjust_present_page_count(struct page *page, long count)
+{
+       struct zone *zone = page_zone(page);
+
+       /* We don't need to hold a lock since it is boot-up process */
+       zone->present_pages += count;
+}
+
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
 void __init init_cma_reserved_pageblock(struct page *page)
 {
        unsigned i = pageblock_nr_pages;
+       unsigned long pfn = page_to_pfn(page);
        struct page *p = page;
+       int nid = page_to_nid(page);
+
+       /*
+        * ZONE_MOVABLE will steal present pages from other zones by
+        * changing page links so page_zone() is changed. Before that,
+        * we need to adjust previous zone's page count first.
+        */
+       adjust_present_page_count(page, -pageblock_nr_pages);
 
        do {
                __ClearPageReserved(p);
                set_page_count(p, 0);
-       } while (++p, --i);
+
+               /* Steal pages from other zones */
+               set_page_links(p, ZONE_MOVABLE, nid, pfn);
+       } while (++p, ++pfn, --i);
+
+       adjust_present_page_count(page, pageblock_nr_pages);
 
        set_pageblock_migratetype(page, MIGRATE_CMA);
 
@@ -2870,7 +2889,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
                 * exists.
                 */
                watermark = min_wmark_pages(zone) + (1UL << order);
-               if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
+               if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
                        return 0;
 
                __mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -3146,12 +3165,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
        }
 
 
-#ifdef CONFIG_CMA
-       /* If allocation can't use CMA areas don't use free CMA pages */
-       if (!(alloc_flags & ALLOC_CMA))
-               free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
-
        /*
         * Check watermarks for an order-0 allocation request. If these
         * are not met, then a high-order request also cannot go ahead
@@ -3178,10 +3191,8 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                }
 
 #ifdef CONFIG_CMA
-               if ((alloc_flags & ALLOC_CMA) &&
-                   !list_empty(&area->free_list[MIGRATE_CMA])) {
+               if (!list_empty(&area->free_list[MIGRATE_CMA]))
                        return true;
-               }
 #endif
                if (alloc_harder &&
                        !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
@@ -3201,13 +3212,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
                unsigned long mark, int classzone_idx, unsigned int alloc_flags)
 {
        long free_pages = zone_page_state(z, NR_FREE_PAGES);
-       long cma_pages = 0;
-
-#ifdef CONFIG_CMA
-       /* If allocation can't use CMA areas don't use free CMA pages */
-       if (!(alloc_flags & ALLOC_CMA))
-               cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
 
        /*
         * Fast check for order-0 only. If this fails then the reserves
@@ -3216,7 +3220,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
         * the caller is !atomic then it'll uselessly search the free
         * list. That corner case is then slower but it is harmless.
         */
-       if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
+       if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx])
                return true;
 
        return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
@@ -3852,10 +3856,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
        } else if (unlikely(rt_task(current)) && !in_interrupt())
                alloc_flags |= ALLOC_HARDER;
 
-#ifdef CONFIG_CMA
-       if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
-               alloc_flags |= ALLOC_CMA;
-#endif
        return alloc_flags;
 }
 
@@ -4322,9 +4322,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
        if (should_fail_alloc_page(gfp_mask, order))
                return false;
 
-       if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
-               *alloc_flags |= ALLOC_CMA;
-
        return true;
 }
 
@@ -4734,6 +4731,13 @@ long si_mem_available(void)
                     min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
                         wmark_low);
 
+       /*
+        * Part of the kernel memory, which can be released under memory
+        * pressure.
+        */
+       available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
+               PAGE_SHIFT;
+
        if (available < 0)
                available = 0;
        return available;
@@ -6200,6 +6204,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 {
        enum zone_type j;
        int nid = pgdat->node_id;
+       unsigned long node_end_pfn = 0;
 
        pgdat_resize_init(pgdat);
 #ifdef CONFIG_NUMA_BALANCING
@@ -6227,9 +6232,13 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize, freesize, memmap_pages;
                unsigned long zone_start_pfn = zone->zone_start_pfn;
+               unsigned long movable_size = 0;
 
                size = zone->spanned_pages;
                realsize = freesize = zone->present_pages;
+               if (zone_end_pfn(zone) > node_end_pfn)
+                       node_end_pfn = zone_end_pfn(zone);
+
 
                /*
                 * Adjust freesize so that it accounts for how much memory
@@ -6278,12 +6287,30 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                zone_seqlock_init(zone);
                zone_pcp_init(zone);
 
-               if (!size)
+               /*
+                * The size of the CMA area is unknown now so we need to
+                * prepare the memory for the usemap at maximum.
+                */
+               if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE &&
+                       pgdat->node_spanned_pages) {
+                       movable_size = node_end_pfn - pgdat->node_start_pfn;
+               }
+
+               if (!size && !movable_size)
                        continue;
 
                set_pageblock_order();
-               setup_usemap(pgdat, zone, zone_start_pfn, size);
-               init_currently_empty_zone(zone, zone_start_pfn, size);
+               if (movable_size) {
+                       zone->zone_start_pfn = pgdat->node_start_pfn;
+                       zone->spanned_pages = movable_size;
+                       setup_usemap(pgdat, zone,
+                               pgdat->node_start_pfn, movable_size);
+                       init_currently_empty_zone(zone,
+                               pgdat->node_start_pfn, movable_size);
+               } else {
+                       setup_usemap(pgdat, zone, zone_start_pfn, size);
+                       init_currently_empty_zone(zone, zone_start_pfn, size);
+               }
                memmap_init(size, nid, j, zone_start_pfn);
        }
 }
@@ -7125,13 +7152,15 @@ static void setup_per_zone_lowmem_reserve(void)
                                struct zone *lower_zone;
 
                                idx--;
-
-                               if (sysctl_lowmem_reserve_ratio[idx] < 1)
-                                       sysctl_lowmem_reserve_ratio[idx] = 1;
-
                                lower_zone = pgdat->node_zones + idx;
-                               lower_zone->lowmem_reserve[j] = managed_pages /
-                                       sysctl_lowmem_reserve_ratio[idx];
+
+                               if (sysctl_lowmem_reserve_ratio[idx] < 1) {
+                                       sysctl_lowmem_reserve_ratio[idx] = 0;
+                                       lower_zone->lowmem_reserve[j] = 0;
+                               } else {
+                                       lower_zone->lowmem_reserve[j] =
+                                               managed_pages / sysctl_lowmem_reserve_ratio[idx];
+                               }
                                managed_pages += lower_zone->managed_pages;
                        }
                }
@@ -7922,7 +7951,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
 }
 #endif
 
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA
 /*
  * The zone indicated has a new number of managed_pages; batch sizes and percpu
  * page high values need to be recalulated.
index 61dee77bb211ea6ee18508f270f5ea07405e0d99..43e0856088467551ae6bb71090d89469ebe3fb8d 100644 (file)
@@ -309,8 +309,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
        return pfn < end_pfn ? -EBUSY : 0;
 }
 
-struct page *alloc_migrate_target(struct page *page, unsigned long private,
-                                 int **resultp)
+struct page *alloc_migrate_target(struct page *page, unsigned long private)
 {
        return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
 }
index 4d57b4644f9832251660a59796837fbcb4f86d6b..539bbb6c1fad98bf984df74051c8f404e6e37f67 100644 (file)
@@ -175,7 +175,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
                        break;
 
                rcu_read_lock();
-               page = radix_tree_lookup(&mapping->page_tree, page_offset);
+               page = radix_tree_lookup(&mapping->i_pages, page_offset);
                rcu_read_unlock();
                if (page && !radix_tree_exceptional_entry(page))
                        continue;
index 9122787c4947efbe127766f6497f7619c670e0e1..f0dd4e4565bc6bc9117fe8ec9b8a2371d7f7f8b4 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
  *                 mmlist_lock (in mmput, drain_mmlist and others)
  *                 mapping->private_lock (in __set_page_dirty_buffers)
  *                   mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
- *                     mapping->tree_lock (widely used)
+ *                     i_pages lock (widely used)
  *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
  *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
  *                   sb_lock (within inode_lock in fs/fs-writeback.c)
- *                   mapping->tree_lock (widely used, in set_page_dirty,
+ *                   i_pages lock (widely used, in set_page_dirty,
  *                             in arch-dependent flush_dcache_mmap_lock,
  *                             within bdi.wb->list_lock in __sync_single_inode)
  *
index 4424fc0c33aaf30e208a57e12e74dcf5d99afbd9..9d6c7e5954153b6b678ff4660b0dfddd143c21fb 100644 (file)
@@ -332,12 +332,12 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
 
        VM_BUG_ON(!expected);
        VM_BUG_ON(!replacement);
-       item = __radix_tree_lookup(&mapping->page_tree, index, &node, &pslot);
+       item = __radix_tree_lookup(&mapping->i_pages, index, &node, &pslot);
        if (!item)
                return -ENOENT;
        if (item != expected)
                return -ENOENT;
-       __radix_tree_replace(&mapping->page_tree, node, pslot,
+       __radix_tree_replace(&mapping->i_pages, node, pslot,
                             replacement, NULL);
        return 0;
 }
@@ -355,7 +355,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
        void *item;
 
        rcu_read_lock();
-       item = radix_tree_lookup(&mapping->page_tree, index);
+       item = radix_tree_lookup(&mapping->i_pages, index);
        rcu_read_unlock();
        return item == swp_to_radix_entry(swap);
 }
@@ -590,14 +590,14 @@ static int shmem_add_to_page_cache(struct page *page,
        page->mapping = mapping;
        page->index = index;
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
        if (PageTransHuge(page)) {
                void __rcu **results;
                pgoff_t idx;
                int i;
 
                error = 0;
-               if (radix_tree_gang_lookup_slot(&mapping->page_tree,
+               if (radix_tree_gang_lookup_slot(&mapping->i_pages,
                                        &results, &idx, index, 1) &&
                                idx < index + HPAGE_PMD_NR) {
                        error = -EEXIST;
@@ -605,14 +605,14 @@ static int shmem_add_to_page_cache(struct page *page,
 
                if (!error) {
                        for (i = 0; i < HPAGE_PMD_NR; i++) {
-                               error = radix_tree_insert(&mapping->page_tree,
+                               error = radix_tree_insert(&mapping->i_pages,
                                                index + i, page + i);
                                VM_BUG_ON(error);
                        }
                        count_vm_event(THP_FILE_ALLOC);
                }
        } else if (!expected) {
-               error = radix_tree_insert(&mapping->page_tree, index, page);
+               error = radix_tree_insert(&mapping->i_pages, index, page);
        } else {
                error = shmem_radix_tree_replace(mapping, index, expected,
                                                                 page);
@@ -624,10 +624,10 @@ static int shmem_add_to_page_cache(struct page *page,
                        __inc_node_page_state(page, NR_SHMEM_THPS);
                __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
                __mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
        } else {
                page->mapping = NULL;
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
                page_ref_sub(page, nr);
        }
        return error;
@@ -643,13 +643,13 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
 
        VM_BUG_ON_PAGE(PageCompound(page), page);
 
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
        error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
        page->mapping = NULL;
        mapping->nrpages--;
        __dec_node_page_state(page, NR_FILE_PAGES);
        __dec_node_page_state(page, NR_SHMEM);
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
        put_page(page);
        BUG_ON(error);
 }
@@ -662,9 +662,9 @@ static int shmem_free_swap(struct address_space *mapping,
 {
        void *old;
 
-       spin_lock_irq(&mapping->tree_lock);
-       old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
+       old = radix_tree_delete_item(&mapping->i_pages, index, radswap);
+       xa_unlock_irq(&mapping->i_pages);
        if (old != radswap)
                return -ENOENT;
        free_swap_and_cache(radix_to_swp_entry(radswap));
@@ -675,7 +675,7 @@ static int shmem_free_swap(struct address_space *mapping,
  * Determine (in bytes) how many of the shmem object's pages mapped by the
  * given offsets are swapped out.
  *
- * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
+ * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
  * as long as the inode doesn't go away and racy results are not a problem.
  */
 unsigned long shmem_partial_swap_usage(struct address_space *mapping,
@@ -688,7 +688,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 
        rcu_read_lock();
 
-       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
                if (iter.index >= end)
                        break;
 
@@ -717,7 +717,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
  * Determine (in bytes) how many of the shmem object's pages mapped by the
  * given vma is swapped out.
  *
- * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
+ * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
  * as long as the inode doesn't go away and racy results are not a problem.
  */
 unsigned long shmem_swap_usage(struct vm_area_struct *vma)
@@ -1132,7 +1132,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
        int error = 0;
 
        radswap = swp_to_radix_entry(swap);
-       index = find_swap_entry(&mapping->page_tree, radswap);
+       index = find_swap_entry(&mapping->i_pages, radswap);
        if (index == -1)
                return -EAGAIN; /* tell shmem_unuse we found nothing */
 
@@ -1448,7 +1448,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
        hindex = round_down(index, HPAGE_PMD_NR);
        rcu_read_lock();
-       if (radix_tree_gang_lookup_slot(&mapping->page_tree, &results, &idx,
+       if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx,
                                hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
                rcu_read_unlock();
                return NULL;
@@ -1561,14 +1561,14 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
         * Our caller will very soon move newpage out of swapcache, but it's
         * a nice clean interface for us to replace oldpage by newpage there.
         */
-       spin_lock_irq(&swap_mapping->tree_lock);
+       xa_lock_irq(&swap_mapping->i_pages);
        error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
                                                                   newpage);
        if (!error) {
                __inc_node_page_state(newpage, NR_FILE_PAGES);
                __dec_node_page_state(oldpage, NR_FILE_PAGES);
        }
-       spin_unlock_irq(&swap_mapping->tree_lock);
+       xa_unlock_irq(&swap_mapping->i_pages);
 
        if (unlikely(error)) {
                /*
@@ -2634,7 +2634,7 @@ static void shmem_tag_pins(struct address_space *mapping)
        start = 0;
        rcu_read_lock();
 
-       radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
                page = radix_tree_deref_slot(slot);
                if (!page || radix_tree_exception(page)) {
                        if (radix_tree_deref_retry(page)) {
@@ -2642,10 +2642,10 @@ static void shmem_tag_pins(struct address_space *mapping)
                                continue;
                        }
                } else if (page_count(page) - page_mapcount(page) > 1) {
-                       spin_lock_irq(&mapping->tree_lock);
-                       radix_tree_tag_set(&mapping->page_tree, iter.index,
+                       xa_lock_irq(&mapping->i_pages);
+                       radix_tree_tag_set(&mapping->i_pages, iter.index,
                                           SHMEM_TAG_PINNED);
-                       spin_unlock_irq(&mapping->tree_lock);
+                       xa_unlock_irq(&mapping->i_pages);
                }
 
                if (need_resched()) {
@@ -2677,7 +2677,7 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 
        error = 0;
        for (scan = 0; scan <= LAST_SCAN; scan++) {
-               if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
+               if (!radix_tree_tagged(&mapping->i_pages, SHMEM_TAG_PINNED))
                        break;
 
                if (!scan)
@@ -2687,7 +2687,7 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 
                start = 0;
                rcu_read_lock();
-               radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
+               radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter,
                                           start, SHMEM_TAG_PINNED) {
 
                        page = radix_tree_deref_slot(slot);
@@ -2713,10 +2713,10 @@ static int shmem_wait_for_pins(struct address_space *mapping)
                                error = -EBUSY;
                        }
 
-                       spin_lock_irq(&mapping->tree_lock);
-                       radix_tree_tag_clear(&mapping->page_tree,
+                       xa_lock_irq(&mapping->i_pages);
+                       radix_tree_tag_clear(&mapping->i_pages,
                                             iter.index, SHMEM_TAG_PINNED);
-                       spin_unlock_irq(&mapping->tree_lock);
+                       xa_unlock_irq(&mapping->i_pages);
 continue_resched:
                        if (need_resched()) {
                                slot = radix_tree_iter_resume(slot, &iter);
index 4fb037c9878251fb88395e35a96acf1193b3b1b7..44aa7847324ac4f8ea99cfe9e53834d56c4818de 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1363,10 +1363,8 @@ static __always_inline void kfree_hook(void *x)
        kasan_kfree_large(x, _RET_IP_);
 }
 
-static __always_inline void *slab_free_hook(struct kmem_cache *s, void *x)
+static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
 {
-       void *freeptr;
-
        kmemleak_free_recursive(x, s->flags);
 
        /*
@@ -1386,17 +1384,12 @@ static __always_inline void *slab_free_hook(struct kmem_cache *s, void *x)
        if (!(s->flags & SLAB_DEBUG_OBJECTS))
                debug_check_no_obj_freed(x, s->object_size);
 
-       freeptr = get_freepointer(s, x);
-       /*
-        * kasan_slab_free() may put x into memory quarantine, delaying its
-        * reuse. In this case the object's freelist pointer is changed.
-        */
-       kasan_slab_free(s, x, _RET_IP_);
-       return freeptr;
+       /* KASAN might put x into memory quarantine, delaying its reuse */
+       return kasan_slab_free(s, x, _RET_IP_);
 }
 
-static inline void slab_free_freelist_hook(struct kmem_cache *s,
-                                          void *head, void *tail)
+static inline bool slab_free_freelist_hook(struct kmem_cache *s,
+                                          void **head, void **tail)
 {
 /*
  * Compiler cannot detect this function can be removed if slab_free_hook()
@@ -1407,13 +1400,33 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s,
        defined(CONFIG_DEBUG_OBJECTS_FREE) ||   \
        defined(CONFIG_KASAN)
 
-       void *object = head;
-       void *tail_obj = tail ? : head;
-       void *freeptr;
+       void *object;
+       void *next = *head;
+       void *old_tail = *tail ? *tail : *head;
+
+       /* Head and tail of the reconstructed freelist */
+       *head = NULL;
+       *tail = NULL;
 
        do {
-               freeptr = slab_free_hook(s, object);
-       } while ((object != tail_obj) && (object = freeptr));
+               object = next;
+               next = get_freepointer(s, object);
+               /* If object's reuse doesn't have to be delayed */
+               if (!slab_free_hook(s, object)) {
+                       /* Move object to the new freelist */
+                       set_freepointer(s, object, *head);
+                       *head = object;
+                       if (!*tail)
+                               *tail = object;
+               }
+       } while (object != old_tail);
+
+       if (*head == *tail)
+               *tail = NULL;
+
+       return *head != NULL;
+#else
+       return true;
 #endif
 }
 
@@ -2968,14 +2981,12 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
                                      void *head, void *tail, int cnt,
                                      unsigned long addr)
 {
-       slab_free_freelist_hook(s, head, tail);
        /*
-        * slab_free_freelist_hook() could have put the items into quarantine.
-        * If so, no need to free them.
+        * With KASAN enabled slab_free_freelist_hook modifies the freelist
+        * to remove objects, whose reuse must be delayed.
         */
-       if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
-               return;
-       do_slab_free(s, page, head, tail, cnt, addr);
+       if (slab_free_freelist_hook(s, &head, &tail))
+               do_slab_free(s, page, head, tail, cnt, addr);
 }
 
 #ifdef CONFIG_KASAN
index f233dccd3b1bb8132628cbdb059b24b1f679eb61..07f9aa2340c3a4b5c0b1138feffa1d901aa7ccae 100644 (file)
@@ -124,10 +124,10 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
        SetPageSwapCache(page);
 
        address_space = swap_address_space(entry);
-       spin_lock_irq(&address_space->tree_lock);
+       xa_lock_irq(&address_space->i_pages);
        for (i = 0; i < nr; i++) {
                set_page_private(page + i, entry.val + i);
-               error = radix_tree_insert(&address_space->page_tree,
+               error = radix_tree_insert(&address_space->i_pages,
                                          idx + i, page + i);
                if (unlikely(error))
                        break;
@@ -145,13 +145,13 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
                VM_BUG_ON(error == -EEXIST);
                set_page_private(page + i, 0UL);
                while (i--) {
-                       radix_tree_delete(&address_space->page_tree, idx + i);
+                       radix_tree_delete(&address_space->i_pages, idx + i);
                        set_page_private(page + i, 0UL);
                }
                ClearPageSwapCache(page);
                page_ref_sub(page, nr);
        }
-       spin_unlock_irq(&address_space->tree_lock);
+       xa_unlock_irq(&address_space->i_pages);
 
        return error;
 }
@@ -188,7 +188,7 @@ void __delete_from_swap_cache(struct page *page)
        address_space = swap_address_space(entry);
        idx = swp_offset(entry);
        for (i = 0; i < nr; i++) {
-               radix_tree_delete(&address_space->page_tree, idx + i);
+               radix_tree_delete(&address_space->i_pages, idx + i);
                set_page_private(page + i, 0);
        }
        ClearPageSwapCache(page);
@@ -272,9 +272,9 @@ void delete_from_swap_cache(struct page *page)
        entry.val = page_private(page);
 
        address_space = swap_address_space(entry);
-       spin_lock_irq(&address_space->tree_lock);
+       xa_lock_irq(&address_space->i_pages);
        __delete_from_swap_cache(page);
-       spin_unlock_irq(&address_space->tree_lock);
+       xa_unlock_irq(&address_space->i_pages);
 
        put_swap_page(page, entry);
        page_ref_sub(page, hpage_nr_pages(page));
@@ -628,12 +628,11 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
                return -ENOMEM;
        for (i = 0; i < nr; i++) {
                space = spaces + i;
-               INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN);
+               INIT_RADIX_TREE(&space->i_pages, GFP_ATOMIC|__GFP_NOWARN);
                atomic_set(&space->i_mmap_writable, 0);
                space->a_ops = &swap_aops;
                /* swap cache doesn't use writeback related tags */
                mapping_set_no_writeback_tags(space);
-               spin_lock_init(&space->tree_lock);
        }
        nr_swapper_spaces[type] = nr;
        rcu_assign_pointer(swapper_spaces[type], spaces);
index c7a33717d079b1fd03ccf49f857684d3364ac908..cc2cf04d9018ad0c546648dfa6cb8167082bbc99 100644 (file)
@@ -85,7 +85,7 @@ PLIST_HEAD(swap_active_head);
  * is held and the locking order requires swap_lock to be taken
  * before any swap_info_struct->lock.
  */
-struct plist_head *swap_avail_heads;
+static struct plist_head *swap_avail_heads;
 static DEFINE_SPINLOCK(swap_avail_lock);
 
 struct swap_info_struct *swap_info[MAX_SWAPFILES];
@@ -2961,6 +2961,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
        maxpages = swp_offset(pte_to_swp_entry(
                        swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
        last_page = swap_header->info.last_page;
+       if (!last_page) {
+               pr_warn("Empty swap-file\n");
+               return 0;
+       }
        if (last_page > maxpages) {
                pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
                        maxpages << (PAGE_SHIFT - 10),
index c34e2fd4f58391f8a8712373d75c0de7ff2d1f99..1d2fb2dca96fcda760471166b22a09eb4b921418 100644 (file)
@@ -36,11 +36,11 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
        struct radix_tree_node *node;
        void **slot;
 
-       if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
+       if (!__radix_tree_lookup(&mapping->i_pages, index, &node, &slot))
                return;
        if (*slot != entry)
                return;
-       __radix_tree_replace(&mapping->page_tree, node, slot, NULL,
+       __radix_tree_replace(&mapping->i_pages, node, slot, NULL,
                             workingset_update_node);
        mapping->nrexceptional--;
 }
@@ -48,9 +48,9 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
 static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
                               void *entry)
 {
-       spin_lock_irq(&mapping->tree_lock);
+       xa_lock_irq(&mapping->i_pages);
        __clear_shadow_entry(mapping, index, entry);
-       spin_unlock_irq(&mapping->tree_lock);
+       xa_unlock_irq(&mapping->i_pages);
 }
 
 /*
@@ -79,7 +79,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
        dax = dax_mapping(mapping);
        lock = !dax && indices[j] < end;
        if (lock)
-               spin_lock_irq(&mapping->tree_lock);
+               xa_lock_irq(&mapping->i_pages);
 
        for (i = j; i < pagevec_count(pvec); i++) {
                struct page *page = pvec->pages[i];
@@ -102,7 +102,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
        }
 
        if (lock)
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_unlock_irq(&mapping->i_pages);
        pvec->nr = j;
 }
 
@@ -518,8 +518,8 @@ void truncate_inode_pages_final(struct address_space *mapping)
                 * modification that does not see AS_EXITING is
                 * completed before starting the final truncate.
                 */
-               spin_lock_irq(&mapping->tree_lock);
-               spin_unlock_irq(&mapping->tree_lock);
+               xa_lock_irq(&mapping->i_pages);
+               xa_unlock_irq(&mapping->i_pages);
 
                truncate_inode_pages(mapping, 0);
        }
@@ -627,13 +627,13 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
        if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
                return 0;
 
-       spin_lock_irqsave(&mapping->tree_lock, flags);
+       xa_lock_irqsave(&mapping->i_pages, flags);
        if (PageDirty(page))
                goto failed;
 
        BUG_ON(page_has_private(page));
        __delete_from_page_cache(page, NULL);
-       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
 
        if (mapping->a_ops->freepage)
                mapping->a_ops->freepage(page);
@@ -641,7 +641,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
        put_page(page); /* pagecache ref */
        return 1;
 failed:
-       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
        return 0;
 }
 
index 029fc2f3b395054a08595dca3ec38bae63877261..1fc4fa7576f762bbbf341f056ca6d0be803a423f 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -287,7 +287,7 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
 }
 
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
        mm->mmap_base = TASK_UNMAPPED_BASE;
        mm->get_unmapped_area = arch_get_unmapped_area;
@@ -667,6 +667,13 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
                 */
                free += global_node_page_state(NR_SLAB_RECLAIMABLE);
 
+               /*
+                * Part of the kernel memory, which can be released
+                * under memory pressure.
+                */
+               free += global_node_page_state(
+                       NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT;
+
                /*
                 * Leave reserved pages. The pages are not for anonymous pages.
                 */
index 4390a8d5be41ee497569622e3b0381a851870114..8b920ce3ae02f206f8598d6510986ceef6f0440d 100644 (file)
@@ -116,6 +116,16 @@ struct scan_control {
 
        /* Number of pages freed so far during a call to shrink_zones() */
        unsigned long nr_reclaimed;
+
+       struct {
+               unsigned int dirty;
+               unsigned int unqueued_dirty;
+               unsigned int congested;
+               unsigned int writeback;
+               unsigned int immediate;
+               unsigned int file_taken;
+               unsigned int taken;
+       } nr;
 };
 
 #ifdef ARCH_HAS_PREFETCH
@@ -190,6 +200,29 @@ static bool sane_reclaim(struct scan_control *sc)
 #endif
        return false;
 }
+
+static void set_memcg_congestion(pg_data_t *pgdat,
+                               struct mem_cgroup *memcg,
+                               bool congested)
+{
+       struct mem_cgroup_per_node *mn;
+
+       if (!memcg)
+               return;
+
+       mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+       WRITE_ONCE(mn->congested, congested);
+}
+
+static bool memcg_congested(pg_data_t *pgdat,
+                       struct mem_cgroup *memcg)
+{
+       struct mem_cgroup_per_node *mn;
+
+       mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+       return READ_ONCE(mn->congested);
+
+}
 #else
 static bool global_reclaim(struct scan_control *sc)
 {
@@ -200,6 +233,18 @@ static bool sane_reclaim(struct scan_control *sc)
 {
        return true;
 }
+
+static inline void set_memcg_congestion(struct pglist_data *pgdat,
+                               struct mem_cgroup *memcg, bool congested)
+{
+}
+
+static inline bool memcg_congested(struct pglist_data *pgdat,
+                       struct mem_cgroup *memcg)
+{
+       return false;
+
+}
 #endif
 
 /*
@@ -648,7 +693,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
        BUG_ON(!PageLocked(page));
        BUG_ON(mapping != page_mapping(page));
 
-       spin_lock_irqsave(&mapping->tree_lock, flags);
+       xa_lock_irqsave(&mapping->i_pages, flags);
        /*
         * The non racy check for a busy page.
         *
@@ -672,7 +717,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
         * load is not satisfied before that of page->_refcount.
         *
         * Note that if SetPageDirty is always performed via set_page_dirty,
-        * and thus under tree_lock, then this ordering is not required.
+        * and thus under the i_pages lock, then this ordering is not required.
         */
        if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
                refcount = 1 + HPAGE_PMD_NR;
@@ -690,7 +735,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                swp_entry_t swap = { .val = page_private(page) };
                mem_cgroup_swapout(page, swap);
                __delete_from_swap_cache(page);
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
                put_swap_page(page, swap);
        } else {
                void (*freepage)(struct page *);
@@ -711,13 +756,13 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                 * only page cache pages found in these are zero pages
                 * covering holes, and because we don't want to mix DAX
                 * exceptional entries and shadow exceptional entries in the
-                * same page_tree.
+                * same address_space.
                 */
                if (reclaimed && page_is_file_cache(page) &&
                    !mapping_exiting(mapping) && !dax_mapping(mapping))
                        shadow = workingset_eviction(mapping, page);
                __delete_from_page_cache(page, shadow);
-               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
 
                if (freepage != NULL)
                        freepage(page);
@@ -726,7 +771,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
        return 1;
 
 cannot_free:
-       spin_unlock_irqrestore(&mapping->tree_lock, flags);
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
        return 0;
 }
 
@@ -857,17 +902,6 @@ static void page_check_dirty_writeback(struct page *page,
                mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
 }
 
-struct reclaim_stat {
-       unsigned nr_dirty;
-       unsigned nr_unqueued_dirty;
-       unsigned nr_congested;
-       unsigned nr_writeback;
-       unsigned nr_immediate;
-       unsigned nr_activate;
-       unsigned nr_ref_keep;
-       unsigned nr_unmap_fail;
-};
-
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -926,7 +960,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                        (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
 
                /*
-                * The number of dirty pages determines if a zone is marked
+                * The number of dirty pages determines if a node is marked
                 * reclaim_congested which affects wait_iff_congested. kswapd
                 * will stall and start writing pages if the tail of the LRU
                 * is all dirty unqueued pages.
@@ -1754,23 +1788,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        mem_cgroup_uncharge_list(&page_list);
        free_unref_page_list(&page_list);
 
-       /*
-        * If reclaim is isolating dirty pages under writeback, it implies
-        * that the long-lived page allocation rate is exceeding the page
-        * laundering rate. Either the global limits are not being effective
-        * at throttling processes due to the page distribution throughout
-        * zones or there is heavy usage of a slow backing device. The
-        * only option is to throttle from reclaim context which is not ideal
-        * as there is no guarantee the dirtying process is throttled in the
-        * same way balance_dirty_pages() manages.
-        *
-        * Once a zone is flagged ZONE_WRITEBACK, kswapd will count the number
-        * of pages under pages flagged for immediate reclaim and stall if any
-        * are encountered in the nr_immediate check below.
-        */
-       if (stat.nr_writeback && stat.nr_writeback == nr_taken)
-               set_bit(PGDAT_WRITEBACK, &pgdat->flags);
-
        /*
         * If dirty pages are scanned that are not queued for IO, it
         * implies that flushers are not doing their job. This can
@@ -1785,48 +1802,17 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        if (stat.nr_unqueued_dirty == nr_taken)
                wakeup_flusher_threads(WB_REASON_VMSCAN);
 
-       /*
-        * Legacy memcg will stall in page writeback so avoid forcibly
-        * stalling here.
-        */
-       if (sane_reclaim(sc)) {
-               /*
-                * Tag a zone as congested if all the dirty pages scanned were
-                * backed by a congested BDI and wait_iff_congested will stall.
-                */
-               if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
-                       set_bit(PGDAT_CONGESTED, &pgdat->flags);
-
-               /* Allow kswapd to start writing pages during reclaim. */
-               if (stat.nr_unqueued_dirty == nr_taken)
-                       set_bit(PGDAT_DIRTY, &pgdat->flags);
-
-               /*
-                * If kswapd scans pages marked marked for immediate
-                * reclaim and under writeback (nr_immediate), it implies
-                * that pages are cycling through the LRU faster than
-                * they are written so also forcibly stall.
-                */
-               if (stat.nr_immediate && current_may_throttle())
-                       congestion_wait(BLK_RW_ASYNC, HZ/10);
-       }
-
-       /*
-        * Stall direct reclaim for IO completions if underlying BDIs or zone
-        * is congested. Allow kswapd to continue until it starts encountering
-        * unqueued dirty pages or cycling through the LRU too quickly.
-        */
-       if (!sc->hibernation_mode && !current_is_kswapd() &&
-           current_may_throttle())
-               wait_iff_congested(pgdat, BLK_RW_ASYNC, HZ/10);
+       sc->nr.dirty += stat.nr_dirty;
+       sc->nr.congested += stat.nr_congested;
+       sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
+       sc->nr.writeback += stat.nr_writeback;
+       sc->nr.immediate += stat.nr_immediate;
+       sc->nr.taken += nr_taken;
+       if (file)
+               sc->nr.file_taken += nr_taken;
 
        trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
-                       nr_scanned, nr_reclaimed,
-                       stat.nr_dirty,  stat.nr_writeback,
-                       stat.nr_congested, stat.nr_immediate,
-                       stat.nr_activate, stat.nr_ref_keep,
-                       stat.nr_unmap_fail,
-                       sc->priority, file);
+                       nr_scanned, nr_reclaimed, &stat, sc->priority, file);
        return nr_reclaimed;
 }
 
@@ -2507,6 +2493,12 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
        return true;
 }
 
+static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
+{
+       return test_bit(PGDAT_CONGESTED, &pgdat->flags) ||
+               (memcg && memcg_congested(pgdat, memcg));
+}
+
 static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 {
        struct reclaim_state *reclaim_state = current->reclaim_state;
@@ -2522,6 +2514,8 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                unsigned long node_lru_pages = 0;
                struct mem_cgroup *memcg;
 
+               memset(&sc->nr, 0, sizeof(sc->nr));
+
                nr_reclaimed = sc->nr_reclaimed;
                nr_scanned = sc->nr_scanned;
 
@@ -2536,7 +2530,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                                        sc->memcg_low_skipped = 1;
                                        continue;
                                }
-                               mem_cgroup_event(memcg, MEMCG_LOW);
+                               memcg_memory_event(memcg, MEMCG_LOW);
                        }
 
                        reclaimed = sc->nr_reclaimed;
@@ -2587,6 +2581,67 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                if (sc->nr_reclaimed - nr_reclaimed)
                        reclaimable = true;
 
+               if (current_is_kswapd()) {
+                       /*
+                        * If reclaim is isolating dirty pages under writeback,
+                        * it implies that the long-lived page allocation rate
+                        * is exceeding the page laundering rate. Either the
+                        * global limits are not being effective at throttling
+                        * processes due to the page distribution throughout
+                        * zones or there is heavy usage of a slow backing
+                        * device. The only option is to throttle from reclaim
+                        * context which is not ideal as there is no guarantee
+                        * the dirtying process is throttled in the same way
+                        * balance_dirty_pages() manages.
+                        *
+                        * Once a node is flagged PGDAT_WRITEBACK, kswapd will
+                        * count the number of pages under pages flagged for
+                        * immediate reclaim and stall if any are encountered
+                        * in the nr_immediate check below.
+                        */
+                       if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
+                               set_bit(PGDAT_WRITEBACK, &pgdat->flags);
+
+                       /*
+                        * Tag a node as congested if all the dirty pages
+                        * scanned were backed by a congested BDI and
+                        * wait_iff_congested will stall.
+                        */
+                       if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+                               set_bit(PGDAT_CONGESTED, &pgdat->flags);
+
+                       /* Allow kswapd to start writing pages during reclaim.*/
+                       if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+                               set_bit(PGDAT_DIRTY, &pgdat->flags);
+
+                       /*
+                        * If kswapd scans pages marked marked for immediate
+                        * reclaim and under writeback (nr_immediate), it
+                        * implies that pages are cycling through the LRU
+                        * faster than they are written so also forcibly stall.
+                        */
+                       if (sc->nr.immediate)
+                               congestion_wait(BLK_RW_ASYNC, HZ/10);
+               }
+
+               /*
+                * Legacy memcg will stall in page writeback so avoid forcibly
+                * stalling in wait_iff_congested().
+                */
+               if (!global_reclaim(sc) && sane_reclaim(sc) &&
+                   sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+                       set_memcg_congestion(pgdat, root, true);
+
+               /*
+                * Stall direct reclaim for IO completions if underlying BDIs
+                * and node is congested. Allow kswapd to continue until it
+                * starts encountering unqueued dirty pages or cycling through
+                * the LRU too quickly.
+                */
+               if (!sc->hibernation_mode && !current_is_kswapd() &&
+                  current_may_throttle() && pgdat_memcg_congested(pgdat, root))
+                       wait_iff_congested(BLK_RW_ASYNC, HZ/10);
+
        } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
                                         sc->nr_scanned - nr_scanned, sc));
 
@@ -2802,6 +2857,7 @@ retry:
                        continue;
                last_pgdat = zone->zone_pgdat;
                snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
+               set_memcg_congestion(last_pgdat, sc->target_mem_cgroup, false);
        }
 
        delayacct_freepages_end();
@@ -3808,7 +3864,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 
        if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
                /*
-                * Free memory by calling shrink zone with increasing
+                * Free memory by calling shrink node with increasing
                 * priorities until we have enough memory freed.
                 */
                do {
index 33581be705f03ee97527ad61921c3339a05524d0..536332e988b872973b1e4dc663eebf0c3f595802 100644 (file)
@@ -1161,6 +1161,7 @@ const char * const vmstat_text[] = {
        "nr_vmscan_immediate_reclaim",
        "nr_dirtied",
        "nr_written",
+       "nr_indirectly_reclaimable",
 
        /* enum writeback_stat_item counters */
        "nr_dirty_threshold",
index b7d616a3bbbe46e1d2036f33948d197a29853f48..40ee02c83978e6a4bd650b2da6d31e5b866c5b80 100644 (file)
@@ -202,7 +202,7 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
  * @mapping: address space the page was backing
  * @page: the page being evicted
  *
- * Returns a shadow entry to be stored in @mapping->page_tree in place
+ * Returns a shadow entry to be stored in @mapping->i_pages in place
  * of the evicted @page so that a later refault can be detected.
  */
 void *workingset_eviction(struct address_space *mapping, struct page *page)
@@ -348,7 +348,7 @@ void workingset_update_node(struct radix_tree_node *node)
         *
         * Avoid acquiring the list_lru lock when the nodes are
         * already where they should be. The list_empty() test is safe
-        * as node->private_list is protected by &mapping->tree_lock.
+        * as node->private_list is protected by the i_pages lock.
         */
        if (node->count && node->count == node->exceptional) {
                if (list_empty(&node->private_list))
@@ -366,7 +366,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
        unsigned long nodes;
        unsigned long cache;
 
-       /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
+       /* list_lru lock nests inside the IRQ-safe i_pages lock */
        local_irq_disable();
        nodes = list_lru_shrink_count(&shadow_nodes, sc);
        local_irq_enable();
@@ -419,21 +419,21 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 
        /*
         * Page cache insertions and deletions synchroneously maintain
-        * the shadow node LRU under the mapping->tree_lock and the
+        * the shadow node LRU under the i_pages lock and the
         * lru_lock.  Because the page cache tree is emptied before
         * the inode can be destroyed, holding the lru_lock pins any
         * address_space that has radix tree nodes on the LRU.
         *
-        * We can then safely transition to the mapping->tree_lock to
+        * We can then safely transition to the i_pages lock to
         * pin only the address_space of the particular node we want
         * to reclaim, take the node off-LRU, and drop the lru_lock.
         */
 
        node = container_of(item, struct radix_tree_node, private_list);
-       mapping = container_of(node->root, struct address_space, page_tree);
+       mapping = container_of(node->root, struct address_space, i_pages);
 
        /* Coming from the list, invert the lock order */
-       if (!spin_trylock(&mapping->tree_lock)) {
+       if (!xa_trylock(&mapping->i_pages)) {
                spin_unlock(lru_lock);
                ret = LRU_RETRY;
                goto out;
@@ -468,11 +468,11 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
        if (WARN_ON_ONCE(node->exceptional))
                goto out_invalid;
        inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
-       __radix_tree_delete_node(&mapping->page_tree, node,
+       __radix_tree_delete_node(&mapping->i_pages, node,
                                 workingset_lookup_update(mapping));
 
 out_invalid:
-       spin_unlock(&mapping->tree_lock);
+       xa_unlock(&mapping->i_pages);
        ret = LRU_REMOVED_RETRY;
 out:
        local_irq_enable();
@@ -487,7 +487,7 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
 {
        unsigned long ret;
 
-       /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
+       /* list_lru lock nests inside the IRQ-safe i_pages lock */
        local_irq_disable();
        ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
        local_irq_enable();
@@ -503,7 +503,7 @@ static struct shrinker workingset_shadow_shrinker = {
 
 /*
  * Our list_lru->lock is IRQ-safe as it nests inside the IRQ-safe
- * mapping->tree_lock.
+ * i_pages lock.
  */
 static struct lock_class_key shadow_nodes_key;
 
index f579ad4a8100c29110c0c77ad4744161167e499f..c0bca6153b95d2257333fabe9c1b3eb398d66488 100644 (file)
@@ -467,6 +467,8 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
        spin_lock_init(&pool->lock);
        spin_lock_init(&pool->stale_lock);
        pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
+       if (!pool->unbuddied)
+               goto out_pool;
        for_each_possible_cpu(cpu) {
                struct list_head *unbuddied =
                                per_cpu_ptr(pool->unbuddied, cpu);
@@ -479,7 +481,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
        pool->name = name;
        pool->compact_wq = create_singlethread_workqueue(pool->name);
        if (!pool->compact_wq)
-               goto out;
+               goto out_unbuddied;
        pool->release_wq = create_singlethread_workqueue(pool->name);
        if (!pool->release_wq)
                goto out_wq;
@@ -489,8 +491,11 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 
 out_wq:
        destroy_workqueue(pool->compact_wq);
-out:
+out_unbuddied:
+       free_percpu(pool->unbuddied);
+out_pool:
        kfree(pool);
+out:
        return NULL;
 }
 
@@ -533,7 +538,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
        struct z3fold_header *zhdr = NULL;
        struct page *page = NULL;
        enum buddy bud;
-       bool can_sleep = (gfp & __GFP_RECLAIM) == __GFP_RECLAIM;
+       bool can_sleep = gfpflags_allow_blocking(gfp);
 
        if (!size || (gfp & __GFP_HIGHMEM))
                return -EINVAL;
index a9682534c3779fd12510d7f6a82bf1fcab6c88e9..45ff5dc124cc3f45739c7d47b5c020d60831e9f6 100644 (file)
@@ -749,18 +749,31 @@ static bool conn_use_rpa(struct hci_conn *conn)
 }
 
 static void hci_req_add_le_create_conn(struct hci_request *req,
-                                      struct hci_conn *conn)
+                                      struct hci_conn *conn,
+                                      bdaddr_t *direct_rpa)
 {
        struct hci_cp_le_create_conn cp;
        struct hci_dev *hdev = conn->hdev;
        u8 own_addr_type;
 
-       /* Update random address, but set require_privacy to false so
-        * that we never connect with an non-resolvable address.
+       /* If direct address was provided we use it instead of current
+        * address.
         */
-       if (hci_update_random_address(req, false, conn_use_rpa(conn),
-                                     &own_addr_type))
-               return;
+       if (direct_rpa) {
+               if (bacmp(&req->hdev->random_addr, direct_rpa))
+                       hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
+                                                               direct_rpa);
+
+               /* direct address is always RPA */
+               own_addr_type = ADDR_LE_DEV_RANDOM;
+       } else {
+               /* Update random address, but set require_privacy to false so
+                * that we never connect with an non-resolvable address.
+                */
+               if (hci_update_random_address(req, false, conn_use_rpa(conn),
+                                             &own_addr_type))
+                       return;
+       }
 
        memset(&cp, 0, sizeof(cp));
 
@@ -825,7 +838,7 @@ static void hci_req_directed_advertising(struct hci_request *req,
 
 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
                                u8 dst_type, u8 sec_level, u16 conn_timeout,
-                               u8 role)
+                               u8 role, bdaddr_t *direct_rpa)
 {
        struct hci_conn_params *params;
        struct hci_conn *conn;
@@ -940,7 +953,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
                hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED);
        }
 
-       hci_req_add_le_create_conn(&req, conn);
+       hci_req_add_le_create_conn(&req, conn, direct_rpa);
 
 create_conn:
        err = hci_req_run(&req, create_le_conn_complete);
index cd3bbb766c24c1503972e07f8ccfe9b4827ce3f3..139707cd9d352c9c76302b58c40b3148de33d051 100644 (file)
@@ -4648,7 +4648,8 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev,
 /* This function requires the caller holds hdev->lock */
 static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
                                              bdaddr_t *addr,
-                                             u8 addr_type, u8 adv_type)
+                                             u8 addr_type, u8 adv_type,
+                                             bdaddr_t *direct_rpa)
 {
        struct hci_conn *conn;
        struct hci_conn_params *params;
@@ -4699,7 +4700,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
        }
 
        conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
-                             HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER);
+                             HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER,
+                             direct_rpa);
        if (!IS_ERR(conn)) {
                /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned
                 * by higher layer that tried to connect, if no then
@@ -4808,8 +4810,13 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
                bdaddr_type = irk->addr_type;
        }
 
-       /* Check if we have been requested to connect to this device */
-       conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type);
+       /* Check if we have been requested to connect to this device.
+        *
+        * direct_addr is set only for directed advertising reports (it is NULL
+        * for advertising reports) and is already verified to be RPA above.
+        */
+       conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type,
+                                                               direct_addr);
        if (conn && type == LE_ADV_IND) {
                /* Store report for later inclusion by
                 * mgmt_device_connected
index fc6615d5916524c446a9f4f952f2f8b7b5b67e16..9b7907ebfa01b0332be1528b507465c1a304ebe6 100644 (file)
@@ -7156,7 +7156,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
                        hcon = hci_connect_le(hdev, dst, dst_type,
                                              chan->sec_level,
                                              HCI_LE_CONN_TIMEOUT,
-                                             HCI_ROLE_SLAVE);
+                                             HCI_ROLE_SLAVE, NULL);
                else
                        hcon = hci_connect_le_scan(hdev, dst, dst_type,
                                                   chan->sec_level,
index b4bded4b53960faa19099dbb5c7fd70a31209d83..12bf49772d24b93c90c49ebb87e8770f41bb3918 100644 (file)
@@ -8,6 +8,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
        mon_client.o \
        cls_lock_client.o \
        osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+       striper.o \
        debugfs.o \
        auth.o auth_none.o \
        crypto.o armor.o \
index 4adf07826f4a33086bbc78ce6e6142311fac8d8b..584fdbef2088cf7eaa6ad8fea103dc6aa42b6ce9 100644 (file)
@@ -72,6 +72,7 @@ const char *ceph_msg_type_name(int type)
        case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
        case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
        case CEPH_MSG_MDS_MAP: return "mds_map";
+       case CEPH_MSG_FS_MAP_USER: return "fs_map_user";
        case CEPH_MSG_CLIENT_SESSION: return "client_session";
        case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
        case CEPH_MSG_CLIENT_REQUEST: return "client_request";
@@ -79,8 +80,13 @@ const char *ceph_msg_type_name(int type)
        case CEPH_MSG_CLIENT_REPLY: return "client_reply";
        case CEPH_MSG_CLIENT_CAPS: return "client_caps";
        case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+       case CEPH_MSG_CLIENT_QUOTA: return "client_quota";
        case CEPH_MSG_CLIENT_SNAP: return "client_snap";
        case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+       case CEPH_MSG_POOLOP_REPLY: return "poolop_reply";
+       case CEPH_MSG_POOLOP: return "poolop";
+       case CEPH_MSG_MON_COMMAND: return "mon_command";
+       case CEPH_MSG_MON_COMMAND_ACK: return "mon_command_ack";
        case CEPH_MSG_OSD_MAP: return "osd_map";
        case CEPH_MSG_OSD_OP: return "osd_op";
        case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
@@ -217,7 +223,7 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 
        if (i == 16)
                err = 0;
-       dout("parse_fsid ret %d got fsid %pU", err, fsid);
+       dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
        return err;
 }
 
index bf9d079cbafd6e89d56ef5f10d81727bd5cbd42b..02172c408ff28d81a23da4cd7aea12025293611d 100644 (file)
@@ -347,10 +347,12 @@ struct key_type key_type_ceph = {
        .destroy        = ceph_key_destroy,
 };
 
-int ceph_crypto_init(void) {
+int __init ceph_crypto_init(void)
+{
        return register_key_type(&key_type_ceph);
 }
 
-void ceph_crypto_shutdown(void) {
+void ceph_crypto_shutdown(void)
+{
        unregister_key_type(&key_type_ceph);
 }
index 1eef6806aa1a01e1901f7f57d759a2a52c6cd1df..02952605d121871200d437d13a832241b3beb5a4 100644 (file)
@@ -389,7 +389,7 @@ CEPH_DEFINE_SHOW_FUNC(monc_show)
 CEPH_DEFINE_SHOW_FUNC(osdc_show)
 CEPH_DEFINE_SHOW_FUNC(client_options_show)
 
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
 {
        ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
        if (!ceph_debugfs_dir)
@@ -418,7 +418,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
                goto out;
 
        client->monc.debugfs_file = debugfs_create_file("monc",
-                                                     0600,
+                                                     0400,
                                                      client->debugfs_dir,
                                                      client,
                                                      &monc_show_fops);
@@ -426,7 +426,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
                goto out;
 
        client->osdc.debugfs_file = debugfs_create_file("osdc",
-                                                     0600,
+                                                     0400,
                                                      client->debugfs_dir,
                                                      client,
                                                      &osdc_show_fops);
@@ -434,7 +434,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
                goto out;
 
        client->debugfs_monmap = debugfs_create_file("monmap",
-                                       0600,
+                                       0400,
                                        client->debugfs_dir,
                                        client,
                                        &monmap_show_fops);
@@ -442,7 +442,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
                goto out;
 
        client->debugfs_osdmap = debugfs_create_file("osdmap",
-                                       0600,
+                                       0400,
                                        client->debugfs_dir,
                                        client,
                                        &osdmap_show_fops);
@@ -450,7 +450,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
                goto out;
 
        client->debugfs_options = debugfs_create_file("client_options",
-                                       0600,
+                                       0400,
                                        client->debugfs_dir,
                                        client,
                                        &client_options_show_fops);
@@ -477,7 +477,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
 
 #else  /* CONFIG_DEBUG_FS */
 
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
 {
        return 0;
 }
@@ -496,6 +496,3 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
 }
 
 #endif  /* CONFIG_DEBUG_FS */
-
-EXPORT_SYMBOL(ceph_debugfs_init);
-EXPORT_SYMBOL(ceph_debugfs_cleanup);
index 8a4d3758030b73d3b9ca24b09f91b1e65be0844e..fcb40c12b1f838e24b2f2a1a58b632522acab8b4 100644 (file)
@@ -277,7 +277,7 @@ static void _ceph_msgr_exit(void)
        ceph_msgr_slab_exit();
 }
 
-int ceph_msgr_init(void)
+int __init ceph_msgr_init(void)
 {
        if (ceph_msgr_slab_init())
                return -ENOMEM;
@@ -299,7 +299,6 @@ int ceph_msgr_init(void)
 
        return -ENOMEM;
 }
-EXPORT_SYMBOL(ceph_msgr_init);
 
 void ceph_msgr_exit(void)
 {
@@ -307,7 +306,6 @@ void ceph_msgr_exit(void)
 
        _ceph_msgr_exit();
 }
-EXPORT_SYMBOL(ceph_msgr_exit);
 
 void ceph_msgr_flush(void)
 {
@@ -839,93 +837,112 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
                                        size_t length)
 {
        struct ceph_msg_data *data = cursor->data;
-       struct bio *bio;
+       struct ceph_bio_iter *it = &cursor->bio_iter;
 
-       BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+       cursor->resid = min_t(size_t, length, data->bio_length);
+       *it = data->bio_pos;
+       if (cursor->resid < it->iter.bi_size)
+               it->iter.bi_size = cursor->resid;
 
-       bio = data->bio;
-       BUG_ON(!bio);
-
-       cursor->resid = min(length, data->bio_length);
-       cursor->bio = bio;
-       cursor->bvec_iter = bio->bi_iter;
-       cursor->last_piece =
-               cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
+       BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+       cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
 }
 
 static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
                                                size_t *page_offset,
                                                size_t *length)
 {
-       struct ceph_msg_data *data = cursor->data;
-       struct bio *bio;
-       struct bio_vec bio_vec;
-
-       BUG_ON(data->type != CEPH_MSG_DATA_BIO);
-
-       bio = cursor->bio;
-       BUG_ON(!bio);
-
-       bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
-
-       *page_offset = (size_t) bio_vec.bv_offset;
-       BUG_ON(*page_offset >= PAGE_SIZE);
-       if (cursor->last_piece) /* pagelist offset is always 0 */
-               *length = cursor->resid;
-       else
-               *length = (size_t) bio_vec.bv_len;
-       BUG_ON(*length > cursor->resid);
-       BUG_ON(*page_offset + *length > PAGE_SIZE);
+       struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio,
+                                          cursor->bio_iter.iter);
 
-       return bio_vec.bv_page;
+       *page_offset = bv.bv_offset;
+       *length = bv.bv_len;
+       return bv.bv_page;
 }
 
 static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
                                        size_t bytes)
 {
-       struct bio *bio;
-       struct bio_vec bio_vec;
+       struct ceph_bio_iter *it = &cursor->bio_iter;
 
-       BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+       BUG_ON(bytes > cursor->resid);
+       BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
+       cursor->resid -= bytes;
+       bio_advance_iter(it->bio, &it->iter, bytes);
 
-       bio = cursor->bio;
-       BUG_ON(!bio);
+       if (!cursor->resid) {
+               BUG_ON(!cursor->last_piece);
+               return false;   /* no more data */
+       }
 
-       bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
+       if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
+               return false;   /* more bytes to process in this segment */
 
-       /* Advance the cursor offset */
+       if (!it->iter.bi_size) {
+               it->bio = it->bio->bi_next;
+               it->iter = it->bio->bi_iter;
+               if (cursor->resid < it->iter.bi_size)
+                       it->iter.bi_size = cursor->resid;
+       }
 
-       BUG_ON(cursor->resid < bytes);
-       cursor->resid -= bytes;
+       BUG_ON(cursor->last_piece);
+       BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+       cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
+       return true;
+}
+#endif /* CONFIG_BLOCK */
 
-       bio_advance_iter(bio, &cursor->bvec_iter, bytes);
+static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor,
+                                       size_t length)
+{
+       struct ceph_msg_data *data = cursor->data;
+       struct bio_vec *bvecs = data->bvec_pos.bvecs;
 
-       if (bytes < bio_vec.bv_len)
-               return false;   /* more bytes to process in this segment */
+       cursor->resid = min_t(size_t, length, data->bvec_pos.iter.bi_size);
+       cursor->bvec_iter = data->bvec_pos.iter;
+       cursor->bvec_iter.bi_size = cursor->resid;
 
-       /* Move on to the next segment, and possibly the next bio */
+       BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+       cursor->last_piece =
+           cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
+}
 
-       if (!cursor->bvec_iter.bi_size) {
-               bio = bio->bi_next;
-               cursor->bio = bio;
-               if (bio)
-                       cursor->bvec_iter = bio->bi_iter;
-               else
-                       memset(&cursor->bvec_iter, 0,
-                              sizeof(cursor->bvec_iter));
-       }
+static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor,
+                                               size_t *page_offset,
+                                               size_t *length)
+{
+       struct bio_vec bv = bvec_iter_bvec(cursor->data->bvec_pos.bvecs,
+                                          cursor->bvec_iter);
+
+       *page_offset = bv.bv_offset;
+       *length = bv.bv_len;
+       return bv.bv_page;
+}
+
+static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
+                                       size_t bytes)
+{
+       struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
+
+       BUG_ON(bytes > cursor->resid);
+       BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
+       cursor->resid -= bytes;
+       bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes);
 
-       if (!cursor->last_piece) {
-               BUG_ON(!cursor->resid);
-               BUG_ON(!bio);
-               /* A short read is OK, so use <= rather than == */
-               if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
-                       cursor->last_piece = true;
+       if (!cursor->resid) {
+               BUG_ON(!cursor->last_piece);
+               return false;   /* no more data */
        }
 
+       if (!bytes || cursor->bvec_iter.bi_bvec_done)
+               return false;   /* more bytes to process in this segment */
+
+       BUG_ON(cursor->last_piece);
+       BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+       cursor->last_piece =
+           cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
        return true;
 }
-#endif /* CONFIG_BLOCK */
 
 /*
  * For a page array, a piece comes from the first page in the array
@@ -1110,6 +1127,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
                ceph_msg_data_bio_cursor_init(cursor, length);
                break;
 #endif /* CONFIG_BLOCK */
+       case CEPH_MSG_DATA_BVECS:
+               ceph_msg_data_bvecs_cursor_init(cursor, length);
+               break;
        case CEPH_MSG_DATA_NONE:
        default:
                /* BUG(); */
@@ -1158,14 +1178,19 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
                page = ceph_msg_data_bio_next(cursor, page_offset, length);
                break;
 #endif /* CONFIG_BLOCK */
+       case CEPH_MSG_DATA_BVECS:
+               page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
+               break;
        case CEPH_MSG_DATA_NONE:
        default:
                page = NULL;
                break;
        }
+
        BUG_ON(!page);
        BUG_ON(*page_offset + *length > PAGE_SIZE);
        BUG_ON(!*length);
+       BUG_ON(*length > cursor->resid);
        if (last_piece)
                *last_piece = cursor->last_piece;
 
@@ -1194,6 +1219,9 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
                new_piece = ceph_msg_data_bio_advance(cursor, bytes);
                break;
 #endif /* CONFIG_BLOCK */
+       case CEPH_MSG_DATA_BVECS:
+               new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
+               break;
        case CEPH_MSG_DATA_NONE:
        default:
                BUG();
@@ -1575,13 +1603,18 @@ static int write_partial_message_data(struct ceph_connection *con)
         * been revoked, so use the zero page.
         */
        crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
-       while (cursor->resid) {
+       while (cursor->total_resid) {
                struct page *page;
                size_t page_offset;
                size_t length;
                bool last_piece;
                int ret;
 
+               if (!cursor->resid) {
+                       ceph_msg_data_advance(cursor, 0);
+                       continue;
+               }
+
                page = ceph_msg_data_next(cursor, &page_offset, &length,
                                          &last_piece);
                ret = ceph_tcp_sendpage(con->sock, page, page_offset,
@@ -2297,7 +2330,12 @@ static int read_partial_msg_data(struct ceph_connection *con)
 
        if (do_datacrc)
                crc = con->in_data_crc;
-       while (cursor->resid) {
+       while (cursor->total_resid) {
+               if (!cursor->resid) {
+                       ceph_msg_data_advance(cursor, 0);
+                       continue;
+               }
+
                page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
                ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
                if (ret <= 0) {
@@ -3262,16 +3300,14 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
 
 #ifdef CONFIG_BLOCK
-void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
-               size_t length)
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
+                          u32 length)
 {
        struct ceph_msg_data *data;
 
-       BUG_ON(!bio);
-
        data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
        BUG_ON(!data);
-       data->bio = bio;
+       data->bio_pos = *bio_pos;
        data->bio_length = length;
 
        list_add_tail(&data->links, &msg->data);
@@ -3280,6 +3316,20 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
 EXPORT_SYMBOL(ceph_msg_data_add_bio);
 #endif /* CONFIG_BLOCK */
 
+void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
+                            struct ceph_bvec_iter *bvec_pos)
+{
+       struct ceph_msg_data *data;
+
+       data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
+       BUG_ON(!data);
+       data->bvec_pos = *bvec_pos;
+
+       list_add_tail(&data->links, &msg->data);
+       msg->data_length += bvec_pos->iter.bi_size;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+
 /*
  * construct a new message with given type, size
  * the new msg has a ref count of 1.
index 1547107f48544e9690319e4fe9415968448a9f87..b3dac24412d34cbe2e3616371db9fe9838ce550f 100644 (file)
@@ -60,7 +60,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
        num_mon = ceph_decode_32(&p);
        ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
 
-       if (num_mon >= CEPH_MAX_MON)
+       if (num_mon > CEPH_MAX_MON)
                goto bad;
        m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
        if (m == NULL)
index 2814dba5902d7a862a0ffb42b488af473a43fb59..ea2a6c9fb7cef01b54eb86a1a7c580625feb4b1e 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
 #include <linux/ceph/pagelist.h>
+#include <linux/ceph/striper.h>
 
 #define OSD_OPREPLY_FRONT_LEN  512
 
@@ -103,13 +104,12 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
                        u64 *objnum, u64 *objoff, u64 *objlen)
 {
        u64 orig_len = *plen;
-       int r;
+       u32 xlen;
 
        /* object extent? */
-       r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
-                                         objoff, objlen);
-       if (r < 0)
-               return r;
+       ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+                                         objoff, &xlen);
+       *objlen = xlen;
        if (*objlen < orig_len) {
                *plen = *objlen;
                dout(" skipping last %llu, final file extent %llu~%llu\n",
@@ -117,7 +117,6 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
        }
 
        dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
-
        return 0;
 }
 
@@ -148,14 +147,22 @@ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
 
 #ifdef CONFIG_BLOCK
 static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
-                       struct bio *bio, size_t bio_length)
+                                  struct ceph_bio_iter *bio_pos,
+                                  u32 bio_length)
 {
        osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
-       osd_data->bio = bio;
+       osd_data->bio_pos = *bio_pos;
        osd_data->bio_length = bio_length;
 }
 #endif /* CONFIG_BLOCK */
 
+static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
+                                    struct ceph_bvec_iter *bvec_pos)
+{
+       osd_data->type = CEPH_OSD_DATA_TYPE_BVECS;
+       osd_data->bvec_pos = *bvec_pos;
+}
+
 #define osd_req_op_data(oreq, whch, typ, fld)                          \
 ({                                                                     \
        struct ceph_osd_request *__oreq = (oreq);                       \
@@ -218,16 +225,29 @@ EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
 
 #ifdef CONFIG_BLOCK
 void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
-                       unsigned int which, struct bio *bio, size_t bio_length)
+                                   unsigned int which,
+                                   struct ceph_bio_iter *bio_pos,
+                                   u32 bio_length)
 {
        struct ceph_osd_data *osd_data;
 
        osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
-       ceph_osd_data_bio_init(osd_data, bio, bio_length);
+       ceph_osd_data_bio_init(osd_data, bio_pos, bio_length);
 }
 EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
 #endif /* CONFIG_BLOCK */
 
+void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
+                                        unsigned int which,
+                                        struct ceph_bvec_iter *bvec_pos)
+{
+       struct ceph_osd_data *osd_data;
+
+       osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+       ceph_osd_data_bvecs_init(osd_data, bvec_pos);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
+
 static void osd_req_op_cls_request_info_pagelist(
                        struct ceph_osd_request *osd_req,
                        unsigned int which, struct ceph_pagelist *pagelist)
@@ -265,6 +285,23 @@ void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
 }
 EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
 
+void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
+                                      unsigned int which,
+                                      struct bio_vec *bvecs, u32 bytes)
+{
+       struct ceph_osd_data *osd_data;
+       struct ceph_bvec_iter it = {
+               .bvecs = bvecs,
+               .iter = { .bi_size = bytes },
+       };
+
+       osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+       ceph_osd_data_bvecs_init(osd_data, &it);
+       osd_req->r_ops[which].cls.indata_len += bytes;
+       osd_req->r_ops[which].indata_len += bytes;
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_bvecs);
+
 void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
                        unsigned int which, struct page **pages, u64 length,
                        u32 alignment, bool pages_from_pool, bool own_pages)
@@ -290,6 +327,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
        case CEPH_OSD_DATA_TYPE_BIO:
                return (u64)osd_data->bio_length;
 #endif /* CONFIG_BLOCK */
+       case CEPH_OSD_DATA_TYPE_BVECS:
+               return osd_data->bvec_pos.iter.bi_size;
        default:
                WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
                return 0;
@@ -828,8 +867,10 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
                ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
 #ifdef CONFIG_BLOCK
        } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
-               ceph_msg_data_add_bio(msg, osd_data->bio, length);
+               ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length);
 #endif
+       } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
+               ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
        } else {
                BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
        }
@@ -5065,7 +5106,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 }
 EXPORT_SYMBOL(ceph_osdc_writepages);
 
-int ceph_osdc_setup(void)
+int __init ceph_osdc_setup(void)
 {
        size_t size = sizeof(struct ceph_osd_request) +
            CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
@@ -5076,7 +5117,6 @@ int ceph_osdc_setup(void)
 
        return ceph_osd_request_cache ? 0 : -ENOMEM;
 }
-EXPORT_SYMBOL(ceph_osdc_setup);
 
 void ceph_osdc_cleanup(void)
 {
@@ -5084,7 +5124,6 @@ void ceph_osdc_cleanup(void)
        kmem_cache_destroy(ceph_osd_request_cache);
        ceph_osd_request_cache = NULL;
 }
-EXPORT_SYMBOL(ceph_osdc_cleanup);
 
 /*
  * handle incoming message
index 0da27c66349a7a378bc1697de00611618f034f1f..9645ffd6acfb24b1432ad69f655d1caa09741fcf 100644 (file)
@@ -4,7 +4,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <asm/div64.h>
 
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/osdmap.h>
@@ -2140,76 +2139,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
        return false;
 }
 
-/*
- * calculate file layout from given offset, length.
- * fill in correct oid, logical length, and object extent
- * offset, length.
- *
- * for now, we write only a single su, until we can
- * pass a stride back to the caller.
- */
-int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-                                  u64 off, u64 len,
-                                  u64 *ono,
-                                  u64 *oxoff, u64 *oxlen)
-{
-       u32 osize = layout->object_size;
-       u32 su = layout->stripe_unit;
-       u32 sc = layout->stripe_count;
-       u32 bl, stripeno, stripepos, objsetno;
-       u32 su_per_object;
-       u64 t, su_offset;
-
-       dout("mapping %llu~%llu  osize %u fl_su %u\n", off, len,
-            osize, su);
-       if (su == 0 || sc == 0)
-               goto invalid;
-       su_per_object = osize / su;
-       if (su_per_object == 0)
-               goto invalid;
-       dout("osize %u / su %u = su_per_object %u\n", osize, su,
-            su_per_object);
-
-       if ((su & ~PAGE_MASK) != 0)
-               goto invalid;
-
-       /* bl = *off / su; */
-       t = off;
-       do_div(t, su);
-       bl = t;
-       dout("off %llu / su %u = bl %u\n", off, su, bl);
-
-       stripeno = bl / sc;
-       stripepos = bl % sc;
-       objsetno = stripeno / su_per_object;
-
-       *ono = objsetno * sc + stripepos;
-       dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono);
-
-       /* *oxoff = *off % layout->fl_stripe_unit;  # offset in su */
-       t = off;
-       su_offset = do_div(t, su);
-       *oxoff = su_offset + (stripeno % su_per_object) * su;
-
-       /*
-        * Calculate the length of the extent being written to the selected
-        * object. This is the minimum of the full length requested (len) or
-        * the remainder of the current stripe being written to.
-        */
-       *oxlen = min_t(u64, len, su - su_offset);
-
-       dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
-       return 0;
-
-invalid:
-       dout(" invalid layout\n");
-       *ono = 0;
-       *oxoff = 0;
-       *oxlen = 0;
-       return -EINVAL;
-}
-EXPORT_SYMBOL(ceph_calc_file_object_mapping);
-
 /*
  * Map an object into a PG.
  *
diff --git a/net/ceph/striper.c b/net/ceph/striper.c
new file mode 100644 (file)
index 0000000..c36462d
--- /dev/null
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/math64.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/striper.h>
+#include <linux/ceph/types.h>
+
+/*
+ * Map a file extent to a stripe unit within an object.
+ * Fill in objno, offset into object, and object extent length (i.e. the
+ * number of bytes mapped, less than or equal to @l->stripe_unit).
+ *
+ * Example for stripe_count = 3, stripes_per_object = 4:
+ *
+ * blockno   |  0  3  6  9 |  1  4  7 10 |  2  5  8 11 | 12 15 18 21 | 13 16 19
+ * stripeno  |  0  1  2  3 |  0  1  2  3 |  0  1  2  3 |  4  5  6  7 |  4  5  6
+ * stripepos |      0      |      1      |      2      |      0      |      1
+ * objno     |      0      |      1      |      2      |      3      |      4
+ * objsetno  |                    0                    |                    1
+ */
+void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
+                                  u64 off, u64 len,
+                                  u64 *objno, u64 *objoff, u32 *xlen)
+{
+       u32 stripes_per_object = l->object_size / l->stripe_unit;
+       u64 blockno;    /* which su in the file (i.e. globally) */
+       u32 blockoff;   /* offset into su */
+       u64 stripeno;   /* which stripe */
+       u32 stripepos;  /* which su in the stripe,
+                          which object in the object set */
+       u64 objsetno;   /* which object set */
+       u32 objsetpos;  /* which stripe in the object set */
+
+       blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
+       stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
+       objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
+
+       *objno = objsetno * l->stripe_count + stripepos;
+       *objoff = objsetpos * l->stripe_unit + blockoff;
+       *xlen = min_t(u64, len, l->stripe_unit - blockoff);
+}
+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
+
+/*
+ * Return the last extent with given objno (@object_extents is sorted
+ * by objno).  If not found, return NULL and set @add_pos so that the
+ * new extent can be added with list_add(add_pos, new_ex).
+ */
+static struct ceph_object_extent *
+lookup_last(struct list_head *object_extents, u64 objno,
+           struct list_head **add_pos)
+{
+       struct list_head *pos;
+
+       list_for_each_prev(pos, object_extents) {
+               struct ceph_object_extent *ex =
+                   list_entry(pos, typeof(*ex), oe_item);
+
+               if (ex->oe_objno == objno)
+                       return ex;
+
+               if (ex->oe_objno < objno)
+                       break;
+       }
+
+       *add_pos = pos;
+       return NULL;
+}
+
+static struct ceph_object_extent *
+lookup_containing(struct list_head *object_extents, u64 objno,
+                 u64 objoff, u32 xlen)
+{
+       struct ceph_object_extent *ex;
+
+       list_for_each_entry(ex, object_extents, oe_item) {
+               if (ex->oe_objno == objno &&
+                   ex->oe_off <= objoff &&
+                   ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
+                       return ex;
+
+               if (ex->oe_objno > objno)
+                       break;
+       }
+
+       return NULL;
+}
+
+/*
+ * Map a file extent to a sorted list of object extents.
+ *
+ * We want only one (or as few as possible) object extents per object.
+ * Adjacent object extents will be merged together, each returned object
+ * extent may reverse map to multiple different file extents.
+ *
+ * Call @alloc_fn for each new object extent and @action_fn for each
+ * mapped stripe unit, whether it was merged into an already allocated
+ * object extent or started a new object extent.
+ *
+ * Newly allocated object extents are added to @object_extents.
+ * To keep @object_extents sorted, successive calls to this function
+ * must map successive file extents (i.e. the list of file extents that
+ * are mapped using the same @object_extents must be sorted).
+ *
+ * The caller is responsible for @object_extents.
+ */
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+                        struct list_head *object_extents,
+                        struct ceph_object_extent *alloc_fn(void *arg),
+                        void *alloc_arg,
+                        ceph_object_extent_fn_t action_fn,
+                        void *action_arg)
+{
+       struct ceph_object_extent *last_ex, *ex;
+
+       while (len) {
+               struct list_head *add_pos = NULL;
+               u64 objno, objoff;
+               u32 xlen;
+
+               ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+                                             &xlen);
+
+               last_ex = lookup_last(object_extents, objno, &add_pos);
+               if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
+                       ex = alloc_fn(alloc_arg);
+                       if (!ex)
+                               return -ENOMEM;
+
+                       ex->oe_objno = objno;
+                       ex->oe_off = objoff;
+                       ex->oe_len = xlen;
+                       if (action_fn)
+                               action_fn(ex, xlen, action_arg);
+
+                       if (!last_ex)
+                               list_add(&ex->oe_item, add_pos);
+                       else
+                               list_add(&ex->oe_item, &last_ex->oe_item);
+               } else {
+                       last_ex->oe_len += xlen;
+                       if (action_fn)
+                               action_fn(last_ex, xlen, action_arg);
+               }
+
+               off += xlen;
+               len -= xlen;
+       }
+
+       for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
+            ex = list_next_entry(last_ex, oe_item);
+            &ex->oe_item != object_extents;
+            last_ex = ex, ex = list_next_entry(ex, oe_item)) {
+               if (last_ex->oe_objno > ex->oe_objno ||
+                   (last_ex->oe_objno == ex->oe_objno &&
+                    last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
+                       WARN(1, "%s: object_extents list not sorted!\n",
+                            __func__);
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(ceph_file_to_extents);
+
+/*
+ * A stripped down, non-allocating version of ceph_file_to_extents(),
+ * for when @object_extents is already populated.
+ */
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+                        struct list_head *object_extents,
+                        ceph_object_extent_fn_t action_fn,
+                        void *action_arg)
+{
+       while (len) {
+               struct ceph_object_extent *ex;
+               u64 objno, objoff;
+               u32 xlen;
+
+               ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+                                             &xlen);
+
+               ex = lookup_containing(object_extents, objno, objoff, xlen);
+               if (!ex) {
+                       WARN(1, "%s: objno %llu %llu~%u not found!\n",
+                            __func__, objno, objoff, xlen);
+                       return -EINVAL;
+               }
+
+               action_fn(ex, xlen, action_arg);
+
+               off += xlen;
+               len -= xlen;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(ceph_iterate_extents);
+
+/*
+ * Reverse map an object extent to a sorted list of file extents.
+ *
+ * On success, the caller is responsible for:
+ *
+ *     kfree(file_extents)
+ */
+int ceph_extent_to_file(struct ceph_file_layout *l,
+                       u64 objno, u64 objoff, u64 objlen,
+                       struct ceph_file_extent **file_extents,
+                       u32 *num_file_extents)
+{
+       u32 stripes_per_object = l->object_size / l->stripe_unit;
+       u64 blockno;    /* which su */
+       u32 blockoff;   /* offset into su */
+       u64 stripeno;   /* which stripe */
+       u32 stripepos;  /* which su in the stripe,
+                          which object in the object set */
+       u64 objsetno;   /* which object set */
+       u32 i = 0;
+
+       if (!objlen) {
+               *file_extents = NULL;
+               *num_file_extents = 0;
+               return 0;
+       }
+
+       *num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
+                                    DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
+       *file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
+                                     GFP_NOIO);
+       if (!*file_extents)
+               return -ENOMEM;
+
+       div_u64_rem(objoff, l->stripe_unit, &blockoff);
+       while (objlen) {
+               u64 off, len;
+
+               objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
+               stripeno = div_u64(objoff, l->stripe_unit) +
+                                               objsetno * stripes_per_object;
+               blockno = stripeno * l->stripe_count + stripepos;
+               off = blockno * l->stripe_unit + blockoff;
+               len = min_t(u64, objlen, l->stripe_unit - blockoff);
+
+               (*file_extents)[i].fe_off = off;
+               (*file_extents)[i].fe_len = len;
+
+               blockoff = 0;
+               objoff += len;
+               objlen -= len;
+               i++;
+       }
+
+       BUG_ON(i != *num_file_extents);
+       return 0;
+}
+EXPORT_SYMBOL(ceph_extent_to_file);
index 9b04a9fd1dfd0e065a7fe798dd840a07f0e0a4df..969462ebb296250fe5f3b7c4621e9ba9720a2dbe 100644 (file)
@@ -1027,7 +1027,7 @@ bool dev_valid_name(const char *name)
 {
        if (*name == '\0')
                return false;
-       if (strlen(name) >= IFNAMSIZ)
+       if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
                return false;
        if (!strcmp(name, ".") || !strcmp(name, ".."))
                return false;
index c0548d268e1a2ffa381082324cd5497c1fd49ffb..e3e6a3e2ca22a6ee634894e897b089773c820a9c 100644 (file)
@@ -57,8 +57,8 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
                return -EINVAL;
 
        list_for_each_entry(ha, &list->list, list) {
-               if (!memcmp(ha->addr, addr, addr_len) &&
-                   ha->type == addr_type) {
+               if (ha->type == addr_type &&
+                   !memcmp(ha->addr, addr, addr_len)) {
                        if (global) {
                                /* check if addr is already used as global */
                                if (ha->global_use)
index 9236e421bd627392076659c7dede568c36d00e18..ad1317376798cc79735e491b86f7f0e4790ac432 100644 (file)
@@ -2405,6 +2405,16 @@ devlink_resource_size_params_put(struct devlink_resource *resource,
        return 0;
 }
 
+static int devlink_resource_occ_put(struct devlink_resource *resource,
+                                   struct sk_buff *skb)
+{
+       if (!resource->occ_get)
+               return 0;
+       return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+                                resource->occ_get(resource->occ_get_priv),
+                                DEVLINK_ATTR_PAD);
+}
+
 static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
                                struct devlink_resource *resource)
 {
@@ -2425,11 +2435,8 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
        if (resource->size != resource->size_new)
                nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
                                  resource->size_new, DEVLINK_ATTR_PAD);
-       if (resource->resource_ops && resource->resource_ops->occ_get)
-               if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
-                                     resource->resource_ops->occ_get(devlink),
-                                     DEVLINK_ATTR_PAD))
-                       goto nla_put_failure;
+       if (devlink_resource_occ_put(resource, skb))
+               goto nla_put_failure;
        if (devlink_resource_size_params_put(resource, skb))
                goto nla_put_failure;
        if (list_empty(&resource->resource_list))
@@ -3162,15 +3169,13 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
  *     @resource_id: resource's id
  *     @parent_reosurce_id: resource's parent id
  *     @size params: size parameters
- *     @resource_ops: resource ops
  */
 int devlink_resource_register(struct devlink *devlink,
                              const char *resource_name,
                              u64 resource_size,
                              u64 resource_id,
                              u64 parent_resource_id,
-                             const struct devlink_resource_size_params *size_params,
-                             const struct devlink_resource_ops *resource_ops)
+                             const struct devlink_resource_size_params *size_params)
 {
        struct devlink_resource *resource;
        struct list_head *resource_list;
@@ -3213,7 +3218,6 @@ int devlink_resource_register(struct devlink *devlink,
        resource->size = resource_size;
        resource->size_new = resource_size;
        resource->id = resource_id;
-       resource->resource_ops = resource_ops;
        resource->size_valid = true;
        memcpy(&resource->size_params, size_params,
               sizeof(resource->size_params));
@@ -3315,6 +3319,58 @@ out:
 }
 EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
 
+/**
+ *     devlink_resource_occ_get_register - register occupancy getter
+ *
+ *     @devlink: devlink
+ *     @resource_id: resource id
+ *     @occ_get: occupancy getter callback
+ *     @occ_get_priv: occupancy getter callback priv
+ */
+void devlink_resource_occ_get_register(struct devlink *devlink,
+                                      u64 resource_id,
+                                      devlink_resource_occ_get_t *occ_get,
+                                      void *occ_get_priv)
+{
+       struct devlink_resource *resource;
+
+       mutex_lock(&devlink->lock);
+       resource = devlink_resource_find(devlink, NULL, resource_id);
+       if (WARN_ON(!resource))
+               goto out;
+       WARN_ON(resource->occ_get);
+
+       resource->occ_get = occ_get;
+       resource->occ_get_priv = occ_get_priv;
+out:
+       mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+
+/**
+ *     devlink_resource_occ_get_unregister - unregister occupancy getter
+ *
+ *     @devlink: devlink
+ *     @resource_id: resource id
+ */
+void devlink_resource_occ_get_unregister(struct devlink *devlink,
+                                        u64 resource_id)
+{
+       struct devlink_resource *resource;
+
+       mutex_lock(&devlink->lock);
+       resource = devlink_resource_find(devlink, NULL, resource_id);
+       if (WARN_ON(!resource))
+               goto out;
+       WARN_ON(!resource->occ_get);
+
+       resource->occ_get = NULL;
+       resource->occ_get_priv = NULL;
+out:
+       mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
+
 static int __init devlink_module_init(void)
 {
        return genl_register_family(&devlink_nl_family);
index 1bca1e0fc8f70eb394f63c995e06bbc5a9261e51..345b51837ca80bb709bfffe04d58eedbba0b9907 100644 (file)
@@ -857,6 +857,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
        n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
        n->cloned = 1;
        n->nohdr = 0;
+       n->peeked = 0;
        n->destructor = NULL;
        C(tail);
        C(end);
index e65fcb45c3f6c1edc70fc9898ebe6404175b102f..b08feb219b44b67eadf408a33649d8c7ec9db2d0 100644 (file)
@@ -614,6 +614,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        ireq = inet_rsk(req);
        sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
        sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+       ireq->ir_mark = inet_request_mark(sk, skb);
        ireq->ireq_family = AF_INET;
        ireq->ir_iif = sk->sk_bound_dev_if;
 
index 5df7857fc0f3aeefb2ed0324d97d13cb68551383..6344f1b18a6a1b30cd2f3c559987a2c9e9546f81 100644 (file)
@@ -351,6 +351,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
        ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
        ireq->ireq_family = AF_INET6;
+       ireq->ir_mark = inet_request_mark(sk, skb);
 
        if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
            np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
index 70de7895e5b88d4d612ecd2a0af55b078f0d9a04..053731473c9932a8e32a12c43f743c772a661e85 100644 (file)
@@ -126,6 +126,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
        struct dsa_port *cpu_dp = dev->dsa_ptr;
        struct dsa_switch_tree *dst = cpu_dp->dst;
        struct dsa_switch *ds;
+       struct dsa_port *slave_port;
 
        if (device < 0 || device >= DSA_MAX_SWITCHES)
                return NULL;
@@ -137,7 +138,12 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
        if (port < 0 || port >= ds->num_ports)
                return NULL;
 
-       return ds->ports[port].slave;
+       slave_port = &ds->ports[port];
+
+       if (unlikely(slave_port->type != DSA_PORT_TYPE_USER))
+               return NULL;
+
+       return slave_port->slave;
 }
 
 /* port.c */
index be4c595edccb03c44c146a5302c93340c01d6583..bf6c2d4d4fdc9aa5f28f8533c6134128032b7331 100644 (file)
@@ -437,7 +437,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
        /*unsigned long now; */
        struct net *net = dev_net(dev);
 
-       rt = ip_route_output(net, sip, tip, 0, 0);
+       rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev));
        if (IS_ERR(rt))
                return 1;
        if (rt->dst.dev != dev) {
index c3ea4906d237e17dd03553834dc9b6a9bb87a889..88c5069b5d20c74a63323a284f8346aa90efb865 100644 (file)
@@ -178,6 +178,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
                tw->tw_dport        = inet->inet_dport;
                tw->tw_family       = sk->sk_family;
                tw->tw_reuse        = sk->sk_reuse;
+               tw->tw_reuseport    = sk->sk_reuseport;
                tw->tw_hash         = sk->sk_hash;
                tw->tw_ipv6only     = 0;
                tw->tw_transparent  = inet->transparent;
index 1f04bd91fc2e999ddb82f4be92d39d229166b691..d757b9642d0d1c418bffad5bcd50e8e7bf336c66 100644 (file)
@@ -211,6 +211,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
                p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
                if (p) {
                        p->daddr = *daddr;
+                       p->dtime = (__u32)jiffies;
                        refcount_set(&p->refcnt, 2);
                        atomic_set(&p->rid, 0);
                        p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
index a8772a97822421116525561e483dce81cbb17597..9c169bb2444d5990c7562692ba1c92030898bca4 100644 (file)
@@ -781,8 +781,14 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
                    tunnel->encap.type == TUNNEL_ENCAP_NONE) {
                        dev->features |= NETIF_F_GSO_SOFTWARE;
                        dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+               } else {
+                       dev->features &= ~NETIF_F_GSO_SOFTWARE;
+                       dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
                }
                dev->features |= NETIF_F_LLTX;
+       } else {
+               dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
+               dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
        }
 }
 
index de6d94482fe7ef6be52eeea3291849cfb9d961f9..6b0e362cc99b5d3510ea7aa8db68dcb5c8eb675c 100644 (file)
@@ -253,13 +253,14 @@ static struct net_device *__ip_tunnel_create(struct net *net,
        struct net_device *dev;
        char name[IFNAMSIZ];
 
-       if (parms->name[0])
+       err = -E2BIG;
+       if (parms->name[0]) {
+               if (!dev_valid_name(parms->name))
+                       goto failed;
                strlcpy(name, parms->name, IFNAMSIZ);
-       else {
-               if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
-                       err = -E2BIG;
+       } else {
+               if (strlen(ops->kind) > (IFNAMSIZ - 3))
                        goto failed;
-               }
                strlcpy(name, ops->kind, IFNAMSIZ);
                strncat(name, "%d", 2);
        }
index 594a1c605c92286cb6dfdf7fc81431906a7a598c..ccb25d80f67956b12b3ffd57f7ac5ddc9b2cb6c0 100644 (file)
@@ -2296,13 +2296,14 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
                                        const struct sk_buff *skb)
 {
        __u8 tos = RT_FL_TOS(fl4);
-       struct fib_result res;
+       struct fib_result res = {
+               .type           = RTN_UNSPEC,
+               .fi             = NULL,
+               .table          = NULL,
+               .tclassid       = 0,
+       };
        struct rtable *rth;
 
-       res.tclassid    = 0;
-       res.fi          = NULL;
-       res.table       = NULL;
-
        fl4->flowi4_iif = LOOPBACK_IFINDEX;
        fl4->flowi4_tos = tos & IPTOS_RT_MASK;
        fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
index f8a103bdbd603be103bf6c18ed6a55703aab18df..69727bc168cb027009dac95431e40b71291697da 100644 (file)
@@ -335,11 +335,13 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
        if (t || !create)
                return t;
 
-       if (parms->name[0])
+       if (parms->name[0]) {
+               if (!dev_valid_name(parms->name))
+                       return NULL;
                strlcpy(name, parms->name, IFNAMSIZ);
-       else
+       } else {
                strcpy(name, "ip6gre%d");
-
+       }
        dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
                           ip6gre_tunnel_setup);
        if (!dev)
index b8ee50e94af384b1a9b404845291e6967517bf88..2e891d2c30efd3909e42810d3debba677776dd94 100644 (file)
@@ -375,6 +375,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
                                     struct sk_buff *skb)
 {
+       struct dst_entry *dst = skb_dst(skb);
+
+       __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+       __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+
        return dst_output(net, sk, skb);
 }
 
@@ -569,8 +574,6 @@ int ip6_forward(struct sk_buff *skb)
 
        hdr->hop_limit--;
 
-       __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
-       __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
                       net, NULL, skb, skb->dev, dst->dev,
                       ip6_forward_finish);
index df4c29f7d59f030729b1158b809a10f4115d4bbf..da66aaac51cecbf933827c8842e61a8cbb4d274f 100644 (file)
@@ -297,13 +297,16 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
        struct net_device *dev;
        struct ip6_tnl *t;
        char name[IFNAMSIZ];
-       int err = -ENOMEM;
+       int err = -E2BIG;
 
-       if (p->name[0])
+       if (p->name[0]) {
+               if (!dev_valid_name(p->name))
+                       goto failed;
                strlcpy(name, p->name, IFNAMSIZ);
-       else
+       } else {
                sprintf(name, "ip6tnl%%d");
-
+       }
+       err = -ENOMEM;
        dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
                           ip6_tnl_dev_setup);
        if (!dev)
index 6ebb2e8777f42054ca5ee8338aa560f5501d8262..c214ffec02f06f6dccfb9769fc8640e5e56da618 100644 (file)
@@ -212,10 +212,13 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
        char name[IFNAMSIZ];
        int err;
 
-       if (p->name[0])
+       if (p->name[0]) {
+               if (!dev_valid_name(p->name))
+                       goto failed;
                strlcpy(name, p->name, IFNAMSIZ);
-       else
+       } else {
                sprintf(name, "ip6_vti%%d");
+       }
 
        dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup);
        if (!dev)
index 1522bcfd253fcc0a01a4daa0ebcfb8bf154ab5dc..2afce37a71776f5ebd44d20e2d064909cb91f8dd 100644 (file)
@@ -250,11 +250,13 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
        if (!create)
                goto failed;
 
-       if (parms->name[0])
+       if (parms->name[0]) {
+               if (!dev_valid_name(parms->name))
+                       goto failed;
                strlcpy(name, parms->name, IFNAMSIZ);
-       else
+       } else {
                strcpy(name, "sit%d");
-
+       }
        dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
                           ipip6_tunnel_setup);
        if (!dev)
index 14b67dfacc4b48c5bba370da8090792bcd137de0..0fbd3ee26165dd1c9d8304a7d31cd31a3d247f41 100644 (file)
@@ -335,26 +335,6 @@ err_tlock:
 }
 EXPORT_SYMBOL_GPL(l2tp_session_register);
 
-/* Lookup a tunnel by id
- */
-struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id)
-{
-       struct l2tp_tunnel *tunnel;
-       struct l2tp_net *pn = l2tp_pernet(net);
-
-       rcu_read_lock_bh();
-       list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
-               if (tunnel->tunnel_id == tunnel_id) {
-                       rcu_read_unlock_bh();
-                       return tunnel;
-               }
-       }
-       rcu_read_unlock_bh();
-
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
-
 struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth)
 {
        struct l2tp_net *pn = l2tp_pernet(net);
@@ -1436,74 +1416,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 {
        struct l2tp_tunnel *tunnel = NULL;
        int err;
-       struct socket *sock = NULL;
-       struct sock *sk = NULL;
-       struct l2tp_net *pn;
        enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
 
-       /* Get the tunnel socket from the fd, which was opened by
-        * the userspace L2TP daemon. If not specified, create a
-        * kernel socket.
-        */
-       if (fd < 0) {
-               err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id,
-                               cfg, &sock);
-               if (err < 0)
-                       goto err;
-       } else {
-               sock = sockfd_lookup(fd, &err);
-               if (!sock) {
-                       pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n",
-                              tunnel_id, fd, err);
-                       err = -EBADF;
-                       goto err;
-               }
-
-               /* Reject namespace mismatches */
-               if (!net_eq(sock_net(sock->sk), net)) {
-                       pr_err("tunl %u: netns mismatch\n", tunnel_id);
-                       err = -EINVAL;
-                       goto err;
-               }
-       }
-
-       sk = sock->sk;
-
        if (cfg != NULL)
                encap = cfg->encap;
 
-       /* Quick sanity checks */
-       err = -EPROTONOSUPPORT;
-       if (sk->sk_type != SOCK_DGRAM) {
-               pr_debug("tunl %hu: fd %d wrong socket type\n",
-                        tunnel_id, fd);
-               goto err;
-       }
-       switch (encap) {
-       case L2TP_ENCAPTYPE_UDP:
-               if (sk->sk_protocol != IPPROTO_UDP) {
-                       pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-                              tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
-                       goto err;
-               }
-               break;
-       case L2TP_ENCAPTYPE_IP:
-               if (sk->sk_protocol != IPPROTO_L2TP) {
-                       pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-                              tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
-                       goto err;
-               }
-               break;
-       }
-
-       /* Check if this socket has already been prepped */
-       tunnel = l2tp_tunnel(sk);
-       if (tunnel != NULL) {
-               /* This socket has already been prepped */
-               err = -EBUSY;
-               goto err;
-       }
-
        tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
        if (tunnel == NULL) {
                err = -ENOMEM;
@@ -1520,72 +1437,126 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
        rwlock_init(&tunnel->hlist_lock);
        tunnel->acpt_newsess = true;
 
-       /* The net we belong to */
-       tunnel->l2tp_net = net;
-       pn = l2tp_pernet(net);
-
        if (cfg != NULL)
                tunnel->debug = cfg->debug;
 
-       /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
        tunnel->encap = encap;
-       if (encap == L2TP_ENCAPTYPE_UDP) {
-               struct udp_tunnel_sock_cfg udp_cfg = { };
-
-               udp_cfg.sk_user_data = tunnel;
-               udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
-               udp_cfg.encap_rcv = l2tp_udp_encap_recv;
-               udp_cfg.encap_destroy = l2tp_udp_encap_destroy;
-
-               setup_udp_tunnel_sock(net, sock, &udp_cfg);
-       } else {
-               sk->sk_user_data = tunnel;
-       }
 
-       /* Bump the reference count. The tunnel context is deleted
-        * only when this drops to zero. A reference is also held on
-        * the tunnel socket to ensure that it is not released while
-        * the tunnel is extant. Must be done before sk_destruct is
-        * set.
-        */
        refcount_set(&tunnel->ref_count, 1);
-       sock_hold(sk);
-       tunnel->sock = sk;
        tunnel->fd = fd;
 
-       /* Hook on the tunnel socket destructor so that we can cleanup
-        * if the tunnel socket goes away.
-        */
-       tunnel->old_sk_destruct = sk->sk_destruct;
-       sk->sk_destruct = &l2tp_tunnel_destruct;
-       lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock");
-
-       sk->sk_allocation = GFP_ATOMIC;
-
        /* Init delete workqueue struct */
        INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
 
-       /* Add tunnel to our list */
        INIT_LIST_HEAD(&tunnel->list);
-       spin_lock_bh(&pn->l2tp_tunnel_list_lock);
-       list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
-       spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
 
        err = 0;
 err:
        if (tunnelp)
                *tunnelp = tunnel;
 
-       /* If tunnel's socket was created by the kernel, it doesn't
-        *  have a file.
-        */
-       if (sock && sock->file)
-               sockfd_put(sock);
-
        return err;
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
 
+static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
+                               enum l2tp_encap_type encap)
+{
+       if (!net_eq(sock_net(sk), net))
+               return -EINVAL;
+
+       if (sk->sk_type != SOCK_DGRAM)
+               return -EPROTONOSUPPORT;
+
+       if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) ||
+           (encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP))
+               return -EPROTONOSUPPORT;
+
+       if (sk->sk_user_data)
+               return -EBUSY;
+
+       return 0;
+}
+
+int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+                        struct l2tp_tunnel_cfg *cfg)
+{
+       struct l2tp_tunnel *tunnel_walk;
+       struct l2tp_net *pn;
+       struct socket *sock;
+       struct sock *sk;
+       int ret;
+
+       if (tunnel->fd < 0) {
+               ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id,
+                                             tunnel->peer_tunnel_id, cfg,
+                                             &sock);
+               if (ret < 0)
+                       goto err;
+       } else {
+               sock = sockfd_lookup(tunnel->fd, &ret);
+               if (!sock)
+                       goto err;
+
+               ret = l2tp_validate_socket(sock->sk, net, tunnel->encap);
+               if (ret < 0)
+                       goto err_sock;
+       }
+
+       sk = sock->sk;
+
+       sock_hold(sk);
+       tunnel->sock = sk;
+       tunnel->l2tp_net = net;
+
+       pn = l2tp_pernet(net);
+
+       spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+       list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
+               if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
+                       spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+                       ret = -EEXIST;
+                       goto err_sock;
+               }
+       }
+       list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
+       spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+       if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+               struct udp_tunnel_sock_cfg udp_cfg = {
+                       .sk_user_data = tunnel,
+                       .encap_type = UDP_ENCAP_L2TPINUDP,
+                       .encap_rcv = l2tp_udp_encap_recv,
+                       .encap_destroy = l2tp_udp_encap_destroy,
+               };
+
+               setup_udp_tunnel_sock(net, sock, &udp_cfg);
+       } else {
+               sk->sk_user_data = tunnel;
+       }
+
+       tunnel->old_sk_destruct = sk->sk_destruct;
+       sk->sk_destruct = &l2tp_tunnel_destruct;
+       lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class,
+                                  "l2tp_sock");
+       sk->sk_allocation = GFP_ATOMIC;
+
+       if (tunnel->fd >= 0)
+               sockfd_put(sock);
+
+       return 0;
+
+err_sock:
+       if (tunnel->fd < 0)
+               sock_release(sock);
+       else
+               sockfd_put(sock);
+err:
+       return ret;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
+
 /* This function is used by the netlink TUNNEL_DELETE command.
  */
 void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
index 2718d0b284d040810b3027ba62b911f77fc6f932..ba33cbec71eb2e5dca3cbeb4c3dd93239a12949b 100644 (file)
@@ -220,12 +220,14 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
 struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
 struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
                                                const char *ifname);
-struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
 struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
 
 int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
                       u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
                       struct l2tp_tunnel **tunnelp);
+int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+                        struct l2tp_tunnel_cfg *cfg);
+
 void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 struct l2tp_session *l2tp_session_create(int priv_size,
index e7ea9c4b89ffc2ee942fb62d0100f38bb1b47f66..b05dbd9ffcb2b1fea951b724e51ad2a16a265102 100644 (file)
@@ -236,12 +236,6 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
        if (info->attrs[L2TP_ATTR_DEBUG])
                cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
 
-       tunnel = l2tp_tunnel_find(net, tunnel_id);
-       if (tunnel != NULL) {
-               ret = -EEXIST;
-               goto out;
-       }
-
        ret = -EINVAL;
        switch (cfg.encap) {
        case L2TP_ENCAPTYPE_UDP:
@@ -251,9 +245,19 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
                break;
        }
 
-       if (ret >= 0)
-               ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
-                                        tunnel, L2TP_CMD_TUNNEL_CREATE);
+       if (ret < 0)
+               goto out;
+
+       l2tp_tunnel_inc_refcount(tunnel);
+       ret = l2tp_tunnel_register(tunnel, net, &cfg);
+       if (ret < 0) {
+               kfree(tunnel);
+               goto out;
+       }
+       ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel,
+                                L2TP_CMD_TUNNEL_CREATE);
+       l2tp_tunnel_dec_refcount(tunnel);
+
 out:
        return ret;
 }
index d6deca11da196549546f42d5d6322f92e9232275..896bbca9bdaab4af19b260129bff488c32f06660 100644 (file)
@@ -698,6 +698,15 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
                        error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
                        if (error < 0)
                                goto end;
+
+                       l2tp_tunnel_inc_refcount(tunnel);
+                       error = l2tp_tunnel_register(tunnel, sock_net(sk),
+                                                    &tcfg);
+                       if (error < 0) {
+                               kfree(tunnel);
+                               goto end;
+                       }
+                       drop_tunnel = true;
                }
        } else {
                /* Error if we can't find the tunnel */
index fa556fdef57d8a684bd73de05821514482db2eea..55342c4d5cec6a999065aa1c9607bdf476c59d36 100644 (file)
@@ -1844,6 +1844,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
        if (msg->msg_namelen) {
                err = -EINVAL;
+               if (msg->msg_namelen < sizeof(struct sockaddr_nl))
+                       goto out;
                if (addr->nl_family != AF_NETLINK)
                        goto out;
                dst_portid = addr->nl_pid;
index acad04243b41dfab790e0418a0e635bec840cbea..94c7f74909be32f36344ab0b82cfad06c16b041f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -1017,10 +1017,15 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
        if (conn->c_npaths == 0 && hash != 0) {
                rds_send_ping(conn, 0);
 
-               if (conn->c_npaths == 0) {
-                       wait_event_interruptible(conn->c_hs_waitq,
-                                                (conn->c_npaths != 0));
-               }
+               /* The underlying connection is not up yet.  Need to wait
+                * until it is up to be sure that the non-zero c_path can be
+                * used.  But if we are interrupted, we have to use the zero
+                * c_path in case the connection ends up being non-MP capable.
+                */
+               if (conn->c_npaths == 0)
+                       if (wait_event_interruptible(conn->c_hs_waitq,
+                                                    conn->c_npaths != 0))
+                               hash = 0;
                if (conn->c_npaths == 1)
                        hash = 0;
        }
index 9092531d45d840ec545f7011afb41776c07c57df..18089c02e55719d9818842f8cd3b35fa6cf94497 100644 (file)
@@ -248,10 +248,14 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
 
 static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg)
 {
-       if (cfg->is_ebpf)
-               bpf_prog_put(cfg->filter);
-       else
-               bpf_prog_destroy(cfg->filter);
+       struct bpf_prog *filter = cfg->filter;
+
+       if (filter) {
+               if (cfg->is_ebpf)
+                       bpf_prog_put(filter);
+               else
+                       bpf_prog_destroy(filter);
+       }
 
        kfree(cfg->bpf_ops);
        kfree(cfg->bpf_name);
index ed8b6a24b9e9325cc99f17e6ed00ead73fe0171e..bac47b5d18fdbd6bca9e3a39822db67be946b21b 100644 (file)
@@ -489,6 +489,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
                                RCU_INIT_POINTER(*kp, key->next);
 
                                tcf_unbind_filter(tp, &key->res);
+                               idr_remove(&ht->handle_idr, key->handle);
                                tcf_exts_get_net(&key->exts);
                                call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
                                return 0;
index 6dd976c8ab611943fbbcfc3800d330fa68c801b0..31083b5035ecf6412abced7a939d8b1b8f53a302 100644 (file)
@@ -757,8 +757,10 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
                        sctp_v6_map_v4(addr);
        }
 
-       if (addr->sa.sa_family == AF_INET)
+       if (addr->sa.sa_family == AF_INET) {
+               memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
                return sizeof(struct sockaddr_in);
+       }
        return sizeof(struct sockaddr_in6);
 }
 
index 2a2e094560dedafa043b7374941e82559e47a739..80835ac26d2c3ce6559f75aaaa0b315fb77d9adf 100644 (file)
@@ -357,11 +357,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
        if (!opt->pf->af_supported(addr->sa.sa_family, opt))
                return NULL;
 
-       /* V4 mapped address are really of AF_INET family */
-       if (addr->sa.sa_family == AF_INET6 &&
-           ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
-           !opt->pf->af_supported(AF_INET, opt))
-               return NULL;
+       if (addr->sa.sa_family == AF_INET6) {
+               if (len < SIN6_LEN_RFC2133)
+                       return NULL;
+               /* V4 mapped address are really of AF_INET family */
+               if (ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
+                   !opt->pf->af_supported(AF_INET, opt))
+                       return NULL;
+       }
 
        /* If we get this far, af is valid. */
        af = sctp_get_af_specific(addr->sa.sa_family);
index 46d9cd62f781a91cc61c4320a636c1bef953ae6b..aaabb0b776dda6041defb902804e08388e39f9c5 100644 (file)
@@ -59,7 +59,7 @@ static int __tipc_add_sock_diag(struct sk_buff *skb,
        if (!nlh)
                return -EMSGSIZE;
 
-       err = tipc_sk_fill_sock_diag(skb, tsk, req->tidiag_states,
+       err = tipc_sk_fill_sock_diag(skb, cb, tsk, req->tidiag_states,
                                     __tipc_diag_gen_cookie);
        if (err)
                return err;
index cee6674a3bf4cf83f19d9b4b80d704d58b0f199b..1fd1c8b5ce0346cff07fa0a2218701000ced77f0 100644 (file)
@@ -3257,8 +3257,8 @@ out:
 }
 EXPORT_SYMBOL(tipc_nl_sk_walk);
 
-int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
-                          u32 sk_filter_state,
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
+                          struct tipc_sock *tsk, u32 sk_filter_state,
                           u64 (*tipc_diag_gen_cookie)(struct sock *sk))
 {
        struct sock *sk = &tsk->sk;
@@ -3280,7 +3280,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
            nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
            nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
            nla_put_u32(skb, TIPC_NLA_SOCK_UID,
-                       from_kuid_munged(sk_user_ns(NETLINK_CB(skb).sk),
+                       from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk),
                                         sock_i_uid(sk))) ||
            nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
                              tipc_diag_gen_cookie(sk),
index aae3fd4cd06c4b44b94194327faac1aaaf183ddc..aff9b2ae5a1f448d64d72927686d28146dfd5d7d 100644 (file)
@@ -61,8 +61,8 @@ int tipc_sk_rht_init(struct net *net);
 void tipc_sk_rht_destroy(struct net *net);
 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
-int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
-                          u32 sk_filter_state,
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
+                          struct tipc_sock *tsk, u32 sk_filter_state,
                           u64 (*tipc_diag_gen_cookie)(struct sock *sk));
 int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
                    int (*skb_handler)(struct sk_buff *skb,
index f524f551718eb2a5ffa4e083fd2c91d2fb9a5dbe..3db002b9e1d39b4987d87dadc26c3673287d6508 100644 (file)
@@ -62,6 +62,16 @@ config SAMPLE_KDB
          Build an example of how to dynamically add the hello
          command to the kdb shell.
 
+config SAMPLE_QMI_CLIENT
+       tristate "Build qmi client sample -- loadable modules only"
+       depends on m
+       depends on ARCH_QCOM
+       depends on NET
+       select QCOM_QMI_HELPERS
+       help
+         Build an QMI client sample driver, which demonstrates how to
+         communicate with a remote QRTR service, using QMI encoded messages.
+
 config SAMPLE_RPMSG_CLIENT
        tristate "Build rpmsg client sample -- loadable modules only"
        depends on RPMSG && m
index 70cf3758dcf2f91bddd2123ca029ee89e9140c3b..bd601c038b86f67e47fff54ae2dfe4eeab5fc3a6 100644 (file)
@@ -3,4 +3,4 @@
 obj-$(CONFIG_SAMPLES)  += kobject/ kprobes/ trace_events/ livepatch/ \
                           hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
                           configfs/ connector/ v4l/ trace_printk/ \
-                          vfio-mdev/ statx/
+                          vfio-mdev/ statx/ qmi/
diff --git a/samples/qmi/Makefile b/samples/qmi/Makefile
new file mode 100644 (file)
index 0000000..2b111d2
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_QMI_CLIENT) += qmi_sample_client.o
diff --git a/samples/qmi/qmi_sample_client.c b/samples/qmi/qmi_sample_client.c
new file mode 100644 (file)
index 0000000..c9e7276
--- /dev/null
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sample in-kernel QMI client driver
+ *
+ * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2017 Linaro Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/qrtr.h>
+#include <linux/net.h>
+#include <linux/completion.h>
+#include <linux/idr.h>
+#include <linux/string.h>
+#include <net/sock.h>
+#include <linux/soc/qcom/qmi.h>
+
+#define PING_REQ1_TLV_TYPE             0x1
+#define PING_RESP1_TLV_TYPE            0x2
+#define PING_OPT1_TLV_TYPE             0x10
+#define PING_OPT2_TLV_TYPE             0x11
+
+#define DATA_REQ1_TLV_TYPE             0x1
+#define DATA_RESP1_TLV_TYPE            0x2
+#define DATA_OPT1_TLV_TYPE             0x10
+#define DATA_OPT2_TLV_TYPE             0x11
+
+#define TEST_MED_DATA_SIZE_V01         8192
+#define TEST_MAX_NAME_SIZE_V01         255
+
+#define TEST_PING_REQ_MSG_ID_V01       0x20
+#define TEST_DATA_REQ_MSG_ID_V01       0x21
+
+#define TEST_PING_REQ_MAX_MSG_LEN_V01  266
+#define TEST_DATA_REQ_MAX_MSG_LEN_V01  8456
+
+struct test_name_type_v01 {
+       u32 name_len;
+       char name[TEST_MAX_NAME_SIZE_V01];
+};
+
+static struct qmi_elem_info test_name_type_v01_ei[] = {
+       {
+               .data_type      = QMI_DATA_LEN,
+               .elem_len       = 1,
+               .elem_size      = sizeof(u8),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = QMI_COMMON_TLV_TYPE,
+               .offset         = offsetof(struct test_name_type_v01,
+                                          name_len),
+       },
+       {
+               .data_type      = QMI_UNSIGNED_1_BYTE,
+               .elem_len       = TEST_MAX_NAME_SIZE_V01,
+               .elem_size      = sizeof(char),
+               .array_type     = VAR_LEN_ARRAY,
+               .tlv_type       = QMI_COMMON_TLV_TYPE,
+               .offset         = offsetof(struct test_name_type_v01,
+                                          name),
+       },
+       {}
+};
+
+struct test_ping_req_msg_v01 {
+       char ping[4];
+
+       u8 client_name_valid;
+       struct test_name_type_v01 client_name;
+};
+
+static struct qmi_elem_info test_ping_req_msg_v01_ei[] = {
+       {
+               .data_type      = QMI_UNSIGNED_1_BYTE,
+               .elem_len       = 4,
+               .elem_size      = sizeof(char),
+               .array_type     = STATIC_ARRAY,
+               .tlv_type       = PING_REQ1_TLV_TYPE,
+               .offset         = offsetof(struct test_ping_req_msg_v01,
+                                          ping),
+       },
+       {
+               .data_type      = QMI_OPT_FLAG,
+               .elem_len       = 1,
+               .elem_size      = sizeof(u8),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = PING_OPT1_TLV_TYPE,
+               .offset         = offsetof(struct test_ping_req_msg_v01,
+                                          client_name_valid),
+       },
+       {
+               .data_type      = QMI_STRUCT,
+               .elem_len       = 1,
+               .elem_size      = sizeof(struct test_name_type_v01),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = PING_OPT1_TLV_TYPE,
+               .offset         = offsetof(struct test_ping_req_msg_v01,
+                                          client_name),
+               .ei_array       = test_name_type_v01_ei,
+       },
+       {}
+};
+
+struct test_ping_resp_msg_v01 {
+       struct qmi_response_type_v01 resp;
+
+       u8 pong_valid;
+       char pong[4];
+
+       u8 service_name_valid;
+       struct test_name_type_v01 service_name;
+};
+
+static struct qmi_elem_info test_ping_resp_msg_v01_ei[] = {
+       {
+               .data_type      = QMI_STRUCT,
+               .elem_len       = 1,
+               .elem_size      = sizeof(struct qmi_response_type_v01),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = PING_RESP1_TLV_TYPE,
+               .offset         = offsetof(struct test_ping_resp_msg_v01,
+                                          resp),
+               .ei_array       = qmi_response_type_v01_ei,
+       },
+       {
+               .data_type      = QMI_OPT_FLAG,
+               .elem_len       = 1,
+               .elem_size      = sizeof(u8),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = PING_OPT1_TLV_TYPE,
+               .offset         = offsetof(struct test_ping_resp_msg_v01,
+                                          pong_valid),
+       },
+       {
+               .data_type      = QMI_UNSIGNED_1_BYTE,
+               .elem_len       = 4,
+               .elem_size      = sizeof(char),
+               .array_type     = STATIC_ARRAY,
+               .tlv_type       = PING_OPT1_TLV_TYPE,
+               .offset         = offsetof(struct test_ping_resp_msg_v01,
+                                          pong),
+       },
+       {
+               .data_type      = QMI_OPT_FLAG,
+               .elem_len       = 1,
+               .elem_size      = sizeof(u8),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = PING_OPT2_TLV_TYPE,
+               .offset         = offsetof(struct test_ping_resp_msg_v01,
+                                          service_name_valid),
+       },
+       {
+               .data_type      = QMI_STRUCT,
+               .elem_len       = 1,
+               .elem_size      = sizeof(struct test_name_type_v01),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = PING_OPT2_TLV_TYPE,
+               .offset         = offsetof(struct test_ping_resp_msg_v01,
+                                          service_name),
+               .ei_array       = test_name_type_v01_ei,
+       },
+       {}
+};
+
+struct test_data_req_msg_v01 {
+       u32 data_len;
+       u8 data[TEST_MED_DATA_SIZE_V01];
+
+       u8 client_name_valid;
+       struct test_name_type_v01 client_name;
+};
+
+static struct qmi_elem_info test_data_req_msg_v01_ei[] = {
+       {
+               .data_type      = QMI_DATA_LEN,
+               .elem_len       = 1,
+               .elem_size      = sizeof(u32),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = DATA_REQ1_TLV_TYPE,
+               .offset         = offsetof(struct test_data_req_msg_v01,
+                                          data_len),
+       },
+       {
+               .data_type      = QMI_UNSIGNED_1_BYTE,
+               .elem_len       = TEST_MED_DATA_SIZE_V01,
+               .elem_size      = sizeof(u8),
+               .array_type     = VAR_LEN_ARRAY,
+               .tlv_type       = DATA_REQ1_TLV_TYPE,
+               .offset         = offsetof(struct test_data_req_msg_v01,
+                                          data),
+       },
+       {
+               .data_type      = QMI_OPT_FLAG,
+               .elem_len       = 1,
+               .elem_size      = sizeof(u8),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = DATA_OPT1_TLV_TYPE,
+               .offset         = offsetof(struct test_data_req_msg_v01,
+                                          client_name_valid),
+       },
+       {
+               .data_type      = QMI_STRUCT,
+               .elem_len       = 1,
+               .elem_size      = sizeof(struct test_name_type_v01),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = DATA_OPT1_TLV_TYPE,
+               .offset         = offsetof(struct test_data_req_msg_v01,
+                                          client_name),
+               .ei_array       = test_name_type_v01_ei,
+       },
+       {}
+};
+
+struct test_data_resp_msg_v01 {
+       struct qmi_response_type_v01 resp;
+
+       u8 data_valid;
+       u32 data_len;
+       u8 data[TEST_MED_DATA_SIZE_V01];
+
+       u8 service_name_valid;
+       struct test_name_type_v01 service_name;
+};
+
+static struct qmi_elem_info test_data_resp_msg_v01_ei[] = {
+       {
+               .data_type      = QMI_STRUCT,
+               .elem_len       = 1,
+               .elem_size      = sizeof(struct qmi_response_type_v01),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = DATA_RESP1_TLV_TYPE,
+               .offset         = offsetof(struct test_data_resp_msg_v01,
+                                          resp),
+               .ei_array       = qmi_response_type_v01_ei,
+       },
+       {
+               .data_type      = QMI_OPT_FLAG,
+               .elem_len       = 1,
+               .elem_size      = sizeof(u8),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = DATA_OPT1_TLV_TYPE,
+               .offset         = offsetof(struct test_data_resp_msg_v01,
+                                          data_valid),
+       },
+       {
+               .data_type      = QMI_DATA_LEN,
+               .elem_len       = 1,
+               .elem_size      = sizeof(u32),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = DATA_OPT1_TLV_TYPE,
+               .offset         = offsetof(struct test_data_resp_msg_v01,
+                                          data_len),
+       },
+       {
+               .data_type      = QMI_UNSIGNED_1_BYTE,
+               .elem_len       = TEST_MED_DATA_SIZE_V01,
+               .elem_size      = sizeof(u8),
+               .array_type     = VAR_LEN_ARRAY,
+               .tlv_type       = DATA_OPT1_TLV_TYPE,
+               .offset         = offsetof(struct test_data_resp_msg_v01,
+                                          data),
+       },
+       {
+               .data_type      = QMI_OPT_FLAG,
+               .elem_len       = 1,
+               .elem_size      = sizeof(u8),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = DATA_OPT2_TLV_TYPE,
+               .offset         = offsetof(struct test_data_resp_msg_v01,
+                                          service_name_valid),
+       },
+       {
+               .data_type      = QMI_STRUCT,
+               .elem_len       = 1,
+               .elem_size      = sizeof(struct test_name_type_v01),
+               .array_type     = NO_ARRAY,
+               .tlv_type       = DATA_OPT2_TLV_TYPE,
+               .offset         = offsetof(struct test_data_resp_msg_v01,
+                                          service_name),
+               .ei_array       = test_name_type_v01_ei,
+       },
+       {}
+};
+
+/*
+ * ping_write() - ping_pong debugfs file write handler
+ * @file:      debugfs file context
+ * @user_buf:  reference to the user data (ignored)
+ * @count:     number of bytes in @user_buf
+ * @ppos:      offset in @file to write
+ *
+ * This function allows user space to send out a ping_pong QMI encoded message
+ * to the associated remote test service and will return with the result of the
+ * transaction. It serves as an example of how to provide a custom response
+ * handler.
+ *
+ * Return: @count, or negative errno on failure.
+ */
+static ssize_t ping_write(struct file *file, const char __user *user_buf,
+                         size_t count, loff_t *ppos)
+{
+       struct qmi_handle *qmi = file->private_data;
+       struct test_ping_req_msg_v01 req = {};
+       struct qmi_txn txn;
+       int ret;
+
+       memcpy(req.ping, "ping", sizeof(req.ping));
+
+       ret = qmi_txn_init(qmi, &txn, NULL, NULL);
+       if (ret < 0)
+               return ret;
+
+       ret = qmi_send_request(qmi, NULL, &txn,
+                              TEST_PING_REQ_MSG_ID_V01,
+                              TEST_PING_REQ_MAX_MSG_LEN_V01,
+                              test_ping_req_msg_v01_ei, &req);
+       if (ret < 0) {
+               qmi_txn_cancel(&txn);
+               return ret;
+       }
+
+       ret = qmi_txn_wait(&txn, 5 * HZ);
+       if (ret < 0)
+               count = ret;
+
+       return count;
+}
+
+static const struct file_operations ping_fops = {
+       .open = simple_open,
+       .write = ping_write,
+};
+
+static void ping_pong_cb(struct qmi_handle *qmi, struct sockaddr_qrtr *sq,
+                        struct qmi_txn *txn, const void *data)
+{
+       const struct test_ping_resp_msg_v01 *resp = data;
+
+       if (!txn) {
+               pr_err("spurious ping response\n");
+               return;
+       }
+
+       if (resp->resp.result == QMI_RESULT_FAILURE_V01)
+               txn->result = -ENXIO;
+       else if (!resp->pong_valid || memcmp(resp->pong, "pong", 4))
+               txn->result = -EINVAL;
+
+       complete(&txn->completion);
+}
+
+/*
+ * data_write() - data debugfs file write handler
+ * @file:      debugfs file context
+ * @user_buf:  reference to the user data
+ * @count:     number of bytes in @user_buf
+ * @ppos:      offset in @file to write
+ *
+ * This function allows user space to send out a data QMI encoded message to
+ * the associated remote test service and will return with the result of the
+ * transaction. It serves as an example of how to have the QMI helpers decode a
+ * transaction response into a provided object automatically.
+ *
+ * Return: @count, or negative errno on failure.
+ */
+static ssize_t data_write(struct file *file, const char __user *user_buf,
+                         size_t count, loff_t *ppos)
+
+{
+       struct qmi_handle *qmi = file->private_data;
+       struct test_data_resp_msg_v01 *resp;
+       struct test_data_req_msg_v01 *req;
+       struct qmi_txn txn;
+       int ret;
+
+       req = kzalloc(sizeof(*req), GFP_KERNEL);
+       if (!req)
+               return -ENOMEM;
+
+       resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+       if (!resp) {
+               kfree(req);
+               return -ENOMEM;
+       }
+
+       req->data_len = min_t(size_t, sizeof(req->data), count);
+       if (copy_from_user(req->data, user_buf, req->data_len)) {
+               ret = -EFAULT;
+               goto out;
+       }
+
+       ret = qmi_txn_init(qmi, &txn, test_data_resp_msg_v01_ei, resp);
+       if (ret < 0)
+               goto out;
+
+       ret = qmi_send_request(qmi, NULL, &txn,
+                              TEST_DATA_REQ_MSG_ID_V01,
+                              TEST_DATA_REQ_MAX_MSG_LEN_V01,
+                              test_data_req_msg_v01_ei, req);
+       if (ret < 0) {
+               qmi_txn_cancel(&txn);
+               goto out;
+       }
+
+       ret = qmi_txn_wait(&txn, 5 * HZ);
+       if (ret < 0) {
+               goto out;
+       } else if (!resp->data_valid ||
+                  resp->data_len != req->data_len ||
+                  memcmp(resp->data, req->data, req->data_len)) {
+               pr_err("response data doesn't match expectation\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = count;
+
+out:
+       kfree(resp);
+       kfree(req);
+
+       return ret;
+}
+
+static const struct file_operations data_fops = {
+       .open = simple_open,
+       .write = data_write,
+};
+
+static struct qmi_msg_handler qmi_sample_handlers[] = {
+       {
+               .type = QMI_RESPONSE,
+               .msg_id = TEST_PING_REQ_MSG_ID_V01,
+               .ei = test_ping_resp_msg_v01_ei,
+               .decoded_size = sizeof(struct test_ping_req_msg_v01),
+               .fn = ping_pong_cb
+       },
+       {}
+};
+
+struct qmi_sample {
+       struct qmi_handle qmi;
+
+       struct dentry *de_dir;
+       struct dentry *de_data;
+       struct dentry *de_ping;
+};
+
+static struct dentry *qmi_debug_dir;
+
+static int qmi_sample_probe(struct platform_device *pdev)
+{
+       struct sockaddr_qrtr *sq;
+       struct qmi_sample *sample;
+       char path[20];
+       int ret;
+
+       sample = devm_kzalloc(&pdev->dev, sizeof(*sample), GFP_KERNEL);
+       if (!sample)
+               return -ENOMEM;
+
+       ret = qmi_handle_init(&sample->qmi, TEST_DATA_REQ_MAX_MSG_LEN_V01,
+                             NULL,
+                             qmi_sample_handlers);
+       if (ret < 0)
+               return ret;
+
+       sq = dev_get_platdata(&pdev->dev);
+       ret = kernel_connect(sample->qmi.sock, (struct sockaddr *)sq,
+                            sizeof(*sq), 0);
+       if (ret < 0) {
+               pr_err("failed to connect to remote service port\n");
+               goto err_release_qmi_handle;
+       }
+
+       snprintf(path, sizeof(path), "%d:%d", sq->sq_node, sq->sq_port);
+
+       sample->de_dir = debugfs_create_dir(path, qmi_debug_dir);
+       if (IS_ERR(sample->de_dir)) {
+               ret = PTR_ERR(sample->de_dir);
+               goto err_release_qmi_handle;
+       }
+
+       sample->de_data = debugfs_create_file("data", 0600, sample->de_dir,
+                                             sample, &data_fops);
+       if (IS_ERR(sample->de_data)) {
+               ret = PTR_ERR(sample->de_data);
+               goto err_remove_de_dir;
+       }
+
+       sample->de_ping = debugfs_create_file("ping", 0600, sample->de_dir,
+                                             sample, &ping_fops);
+       if (IS_ERR(sample->de_ping)) {
+               ret = PTR_ERR(sample->de_ping);
+               goto err_remove_de_data;
+       }
+
+       platform_set_drvdata(pdev, sample);
+
+       return 0;
+
+err_remove_de_data:
+       debugfs_remove(sample->de_data);
+err_remove_de_dir:
+       debugfs_remove(sample->de_dir);
+err_release_qmi_handle:
+       qmi_handle_release(&sample->qmi);
+
+       return ret;
+}
+
+static int qmi_sample_remove(struct platform_device *pdev)
+{
+       struct qmi_sample *sample = platform_get_drvdata(pdev);
+
+       debugfs_remove(sample->de_ping);
+       debugfs_remove(sample->de_data);
+       debugfs_remove(sample->de_dir);
+
+       qmi_handle_release(&sample->qmi);
+
+       return 0;
+}
+
+static struct platform_driver qmi_sample_driver = {
+       .probe = qmi_sample_probe,
+       .remove = qmi_sample_remove,
+       .driver = {
+               .name = "qmi_sample_client",
+       },
+};
+
+static int qmi_sample_new_server(struct qmi_handle *qmi,
+                                struct qmi_service *service)
+{
+       struct platform_device *pdev;
+       struct sockaddr_qrtr sq = { AF_QIPCRTR, service->node, service->port };
+       int ret;
+
+       pdev = platform_device_alloc("qmi_sample_client", PLATFORM_DEVID_AUTO);
+       if (!pdev)
+               return -ENOMEM;
+
+       ret = platform_device_add_data(pdev, &sq, sizeof(sq));
+       if (ret)
+               goto err_put_device;
+
+       ret = platform_device_add(pdev);
+       if (ret)
+               goto err_put_device;
+
+       service->priv = pdev;
+
+       return 0;
+
+err_put_device:
+       platform_device_put(pdev);
+
+       return ret;
+}
+
+static void qmi_sample_del_server(struct qmi_handle *qmi,
+                                 struct qmi_service *service)
+{
+       struct platform_device *pdev = service->priv;
+
+       platform_device_unregister(pdev);
+}
+
+static struct qmi_handle lookup_client;
+
+static struct qmi_ops lookup_ops = {
+       .new_server = qmi_sample_new_server,
+       .del_server = qmi_sample_del_server,
+};
+
+static int qmi_sample_init(void)
+{
+       int ret;
+
+       qmi_debug_dir = debugfs_create_dir("qmi_sample", NULL);
+       if (IS_ERR(qmi_debug_dir)) {
+               pr_err("failed to create qmi_sample dir\n");
+               return PTR_ERR(qmi_debug_dir);
+       }
+
+       ret = platform_driver_register(&qmi_sample_driver);
+       if (ret)
+               goto err_remove_debug_dir;
+
+       ret = qmi_handle_init(&lookup_client, 0, &lookup_ops, NULL);
+       if (ret < 0)
+               goto err_unregister_driver;
+
+       qmi_add_lookup(&lookup_client, 15, 0, 0);
+
+       return 0;
+
+err_unregister_driver:
+       platform_driver_unregister(&qmi_sample_driver);
+err_remove_debug_dir:
+       debugfs_remove(qmi_debug_dir);
+
+       return ret;
+}
+
+static void qmi_sample_exit(void)
+{
+       qmi_handle_release(&lookup_client);
+
+       platform_driver_unregister(&qmi_sample_driver);
+
+       debugfs_remove(qmi_debug_dir);
+}
+
+module_init(qmi_sample_init);
+module_exit(qmi_sample_exit);
+
+MODULE_DESCRIPTION("Sample QMI client driver");
+MODULE_LICENSE("GPL v2");
index 764ffd1bb1c50dffd88c91eed4416e6071e469df..e16d6713f2368473d96eaec321fda1685cb3cac5 100755 (executable)
@@ -791,7 +791,8 @@ our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)};
 our $declaration_macros = qr{(?x:
        (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(|
        (?:$Storage\s+)?[HLP]?LIST_HEAD\s*\(|
-       (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
+       (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(|
+       (?:SKCIPHER_REQUEST|SHASH_DESC|AHASH_REQUEST)_ON_STACK\s*\(
 )};
 
 sub deparenthesize {
@@ -1075,7 +1076,7 @@ sub parse_email {
        } elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) {
                $address = $1;
                $comment = $2 if defined $2;
-               $formatted_email =~ s/$address.*$//;
+               $formatted_email =~ s/\Q$address\E.*$//;
                $name = $formatted_email;
                $name = trim($name);
                $name =~ s/^\"|\"$//g;
@@ -1217,7 +1218,7 @@ sub sanitise_line {
        for ($off = 1; $off < length($line); $off++) {
                $c = substr($line, $off, 1);
 
-               # Comments we are wacking completly including the begin
+               # Comments we are whacking completely including the begin
                # and end, all to $;.
                if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') {
                        $sanitise_quote = '*/';
@@ -1297,6 +1298,7 @@ sub sanitise_line {
 sub get_quoted_string {
        my ($line, $rawline) = @_;
 
+       return "" if (!defined($line) || !defined($rawline));
        return "" if ($line !~ m/($String)/g);
        return substr($rawline, $-[0], $+[0] - $-[0]);
 }
@@ -1644,6 +1646,28 @@ sub raw_line {
        return $line;
 }
 
+sub get_stat_real {
+       my ($linenr, $lc) = @_;
+
+       my $stat_real = raw_line($linenr, 0);
+       for (my $count = $linenr + 1; $count <= $lc; $count++) {
+               $stat_real = $stat_real . "\n" . raw_line($count, 0);
+       }
+
+       return $stat_real;
+}
+
+sub get_stat_here {
+       my ($linenr, $cnt, $here) = @_;
+
+       my $herectx = $here . "\n";
+       for (my $n = 0; $n < $cnt; $n++) {
+               $herectx .= raw_line($linenr, $n) . "\n";
+       }
+
+       return $herectx;
+}
+
 sub cat_vet {
        my ($vet) = @_;
        my ($res, $coded);
@@ -2257,6 +2281,8 @@ sub process {
 
        my $camelcase_file_seeded = 0;
 
+       my $checklicenseline = 1;
+
        sanitise_line_reset();
        my $line;
        foreach my $rawline (@rawlines) {
@@ -2448,6 +2474,7 @@ sub process {
                        } else {
                                $check = $check_orig;
                        }
+                       $checklicenseline = 1;
                        next;
                }
 
@@ -2911,6 +2938,30 @@ sub process {
                        }
                }
 
+# check for using SPDX license tag at beginning of files
+               if ($realline == $checklicenseline) {
+                       if ($rawline =~ /^[ \+]\s*\#\!\s*\//) {
+                               $checklicenseline = 2;
+                       } elsif ($rawline =~ /^\+/) {
+                               my $comment = "";
+                               if ($realfile =~ /\.(h|s|S)$/) {
+                                       $comment = '/*';
+                               } elsif ($realfile =~ /\.(c|dts|dtsi)$/) {
+                                       $comment = '//';
+                               } elsif (($checklicenseline == 2) || $realfile =~ /\.(sh|pl|py|awk|tc)$/) {
+                                       $comment = '#';
+                               } elsif ($realfile =~ /\.rst$/) {
+                                       $comment = '..';
+                               }
+
+                               if ($comment !~ /^$/ &&
+                                   $rawline !~ /^\+\Q$comment\E SPDX-License-Identifier: /) {
+                                       WARN("SPDX_LICENSE_TAG",
+                                            "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
+                               }
+                       }
+               }
+
 # check we are in a valid source file if not then ignore this hunk
                next if ($realfile !~ /\.(h|c|s|S|sh|dtsi|dts)$/);
 
@@ -3011,6 +3062,12 @@ sub process {
                        }
                }
 
+# check for assignments on the start of a line
+               if ($sline =~ /^\+\s+($Assignment)[^=]/) {
+                       CHK("ASSIGNMENT_CONTINUATIONS",
+                           "Assignment operator '$1' should be on the previous line\n" . $hereprev);
+               }
+
 # check for && or || at the start of a line
                if ($rawline =~ /^\+\s*(&&|\|\|)/) {
                        CHK("LOGICAL_CONTINUATIONS",
@@ -4032,7 +4089,7 @@ sub process {
                        my ($where, $prefix) = ($-[1], $1);
                        if ($prefix !~ /$Type\s+$/ &&
                            ($where != 0 || $prefix !~ /^.\s+$/) &&
-                           $prefix !~ /[{,]\s+$/) {
+                           $prefix !~ /[{,:]\s+$/) {
                                if (ERROR("BRACKET_SPACE",
                                          "space prohibited before open square bracket '['\n" . $herecurr) &&
                                    $fix) {
@@ -4928,12 +4985,8 @@ sub process {
                        #print "REST<$rest> dstat<$dstat> ctx<$ctx>\n";
 
                        $ctx =~ s/\n*$//;
-                       my $herectx = $here . "\n";
                        my $stmt_cnt = statement_rawlines($ctx);
-
-                       for (my $n = 0; $n < $stmt_cnt; $n++) {
-                               $herectx .= raw_line($linenr, $n) . "\n";
-                       }
+                       my $herectx = get_stat_here($linenr, $stmt_cnt, $here);
 
                        if ($dstat ne '' &&
                            $dstat !~ /^(?:$Ident|-?$Constant),$/ &&                    # 10, // foo(),
@@ -5005,12 +5058,9 @@ sub process {
 # check for macros with flow control, but without ## concatenation
 # ## concatenation is commonly a macro that defines a function so ignore those
                        if ($has_flow_statement && !$has_arg_concat) {
-                               my $herectx = $here . "\n";
                                my $cnt = statement_rawlines($ctx);
+                               my $herectx = get_stat_here($linenr, $cnt, $here);
 
-                               for (my $n = 0; $n < $cnt; $n++) {
-                                       $herectx .= raw_line($linenr, $n) . "\n";
-                               }
                                WARN("MACRO_WITH_FLOW_CONTROL",
                                     "Macros with flow control statements should be avoided\n" . "$herectx");
                        }
@@ -5050,11 +5100,7 @@ sub process {
 
                                $ctx =~ s/\n*$//;
                                my $cnt = statement_rawlines($ctx);
-                               my $herectx = $here . "\n";
-
-                               for (my $n = 0; $n < $cnt; $n++) {
-                                       $herectx .= raw_line($linenr, $n) . "\n";
-                               }
+                               my $herectx = get_stat_here($linenr, $cnt, $here);
 
                                if (($stmts =~ tr/;/;/) == 1 &&
                                    $stmts !~ /^\s*(if|while|for|switch)\b/) {
@@ -5068,11 +5114,7 @@ sub process {
                        } elsif ($dstat =~ /^\+\s*#\s*define\s+$Ident.*;\s*$/) {
                                $ctx =~ s/\n*$//;
                                my $cnt = statement_rawlines($ctx);
-                               my $herectx = $here . "\n";
-
-                               for (my $n = 0; $n < $cnt; $n++) {
-                                       $herectx .= raw_line($linenr, $n) . "\n";
-                               }
+                               my $herectx = get_stat_here($linenr, $cnt, $here);
 
                                WARN("TRAILING_SEMICOLON",
                                     "macros should not use a trailing semicolon\n" . "$herectx");
@@ -5195,12 +5237,8 @@ sub process {
                                }
                        }
                        if ($level == 0 && $block =~ /^\s*\{/ && !$allowed) {
-                               my $herectx = $here . "\n";
                                my $cnt = statement_rawlines($block);
-
-                               for (my $n = 0; $n < $cnt; $n++) {
-                                       $herectx .= raw_line($linenr, $n) . "\n";
-                               }
+                               my $herectx = get_stat_here($linenr, $cnt, $here);
 
                                WARN("BRACES",
                                     "braces {} are not necessary for single statement blocks\n" . $herectx);
@@ -5776,36 +5814,50 @@ sub process {
                        }
                }
 
-               # check for vsprintf extension %p<foo> misuses
+# check for vsprintf extension %p<foo> misuses
                if ($^V && $^V ge 5.10.0 &&
                    defined $stat &&
                    $stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s &&
                    $1 !~ /^_*volatile_*$/) {
-                       my $bad_extension = "";
+                       my $specifier;
+                       my $extension;
+                       my $bad_specifier = "";
+                       my $stat_real;
+
                        my $lc = $stat =~ tr@\n@@;
                        $lc = $lc + $linenr;
                        for (my $count = $linenr; $count <= $lc; $count++) {
                                my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0));
                                $fmt =~ s/%%//g;
-                               if ($fmt =~ /(\%[\*\d\.]*p(?![\WSsBKRraEhMmIiUDdgVCbGNOx]).)/) {
-                                       $bad_extension = $1;
-                                       last;
-                               }
-                       }
-                       if ($bad_extension ne "") {
-                               my $stat_real = raw_line($linenr, 0);
-                               my $ext_type = "Invalid";
-                               my $use = "";
-                               for (my $count = $linenr + 1; $count <= $lc; $count++) {
-                                       $stat_real = $stat_real . "\n" . raw_line($count, 0);
+
+                               while ($fmt =~ /(\%[\*\d\.]*p(\w))/g) {
+                                       $specifier = $1;
+                                       $extension = $2;
+                                       if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOx]/) {
+                                               $bad_specifier = $specifier;
+                                               last;
+                                       }
+                                       if ($extension eq "x" && !defined($stat_real)) {
+                                               if (!defined($stat_real)) {
+                                                       $stat_real = get_stat_real($linenr, $lc);
+                                               }
+                                               WARN("VSPRINTF_SPECIFIER_PX",
+                                                    "Using vsprintf specifier '\%px' potentially exposes the kernel memory layout, if you don't really need the address please consider using '\%p'.\n" . "$here\n$stat_real\n");
+                                       }
                                }
-                               if ($bad_extension =~ /p[Ff]/) {
-                                       $ext_type = "Deprecated";
-                                       $use = " - use %pS instead";
-                                       $use =~ s/pS/ps/ if ($bad_extension =~ /pf/);
+                               if ($bad_specifier ne "") {
+                                       my $stat_real = get_stat_real($linenr, $lc);
+                                       my $ext_type = "Invalid";
+                                       my $use = "";
+                                       if ($bad_specifier =~ /p[Ff]/) {
+                                               $ext_type = "Deprecated";
+                                               $use = " - use %pS instead";
+                                               $use =~ s/pS/ps/ if ($bad_specifier =~ /pf/);
+                                       }
+
+                                       WARN("VSPRINTF_POINTER_EXTENSION",
+                                            "$ext_type vsprintf pointer extension '$bad_specifier'$use\n" . "$here\n$stat_real\n");
                                }
-                               WARN("VSPRINTF_POINTER_EXTENSION",
-                                    "$ext_type vsprintf pointer extension '$bad_extension'$use\n" . "$here\n$stat_real\n");
                        }
                }
 
@@ -5918,10 +5970,7 @@ sub process {
                     $stat !~ /(?:$Compare)\s*\bsscanf\s*$balanced_parens/)) {
                        my $lc = $stat =~ tr@\n@@;
                        $lc = $lc + $linenr;
-                       my $stat_real = raw_line($linenr, 0);
-                       for (my $count = $linenr + 1; $count <= $lc; $count++) {
-                               $stat_real = $stat_real . "\n" . raw_line($count, 0);
-                       }
+                       my $stat_real = get_stat_real($linenr, $lc);
                        WARN("NAKED_SSCANF",
                             "unchecked sscanf return value\n" . "$here\n$stat_real\n");
                }
@@ -5932,10 +5981,7 @@ sub process {
                    $line =~ /\bsscanf\b/) {
                        my $lc = $stat =~ tr@\n@@;
                        $lc = $lc + $linenr;
-                       my $stat_real = raw_line($linenr, 0);
-                       for (my $count = $linenr + 1; $count <= $lc; $count++) {
-                               $stat_real = $stat_real . "\n" . raw_line($count, 0);
-                       }
+                       my $stat_real = get_stat_real($linenr, $lc);
                        if ($stat_real =~ /\bsscanf\b\s*\(\s*$FuncArg\s*,\s*("[^"]+")/) {
                                my $format = $6;
                                my $count = $format =~ tr@%@%@;
@@ -6065,12 +6111,9 @@ sub process {
                        }
                        if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ &&
                            !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) {
-                               my $ctx = '';
-                               my $herectx = $here . "\n";
                                my $cnt = statement_rawlines($stat);
-                               for (my $n = 0; $n < $cnt; $n++) {
-                                       $herectx .= raw_line($linenr, $n) . "\n";
-                               }
+                               my $herectx = get_stat_here($linenr, $cnt, $here);
+
                                if (WARN("ALLOC_WITH_MULTIPLY",
                                         "Prefer $newfunc over $oldfunc with multiply\n" . $herectx) &&
                                    $cnt == 1 &&
@@ -6153,12 +6196,9 @@ sub process {
                if ($^V && $^V ge 5.10.0 &&
                    defined $stat &&
                    $stat =~ /^\+[$;\s]*(?:case[$;\s]+\w+[$;\s]*:[$;\s]*|)*[$;\s]*\bdefault[$;\s]*:[$;\s]*;/g) {
-                       my $ctx = '';
-                       my $herectx = $here . "\n";
                        my $cnt = statement_rawlines($stat);
-                       for (my $n = 0; $n < $cnt; $n++) {
-                               $herectx .= raw_line($linenr, $n) . "\n";
-                       }
+                       my $herectx = get_stat_here($linenr, $cnt, $here);
+
                        WARN("DEFAULT_NO_BREAK",
                             "switch default: should use break\n" . $herectx);
                }
@@ -6211,6 +6251,12 @@ sub process {
                        }
                }
 
+# check for bool bitfields
+               if ($sline =~ /^.\s+bool\s*$Ident\s*:\s*\d+\s*;/) {
+                       WARN("BOOL_BITFIELD",
+                            "Avoid using bool as bitfield.  Prefer bool bitfields as unsigned int or u<8|16|32>\n" . $herecurr);
+               }
+
 # check for semaphores initialized locked
                if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) {
                        WARN("CONSIDER_COMPLETION",
@@ -6369,10 +6415,7 @@ sub process {
 
                                my $lc = $stat =~ tr@\n@@;
                                $lc = $lc + $linenr;
-                               my $stat_real = raw_line($linenr, 0);
-                               for (my $count = $linenr + 1; $count <= $lc; $count++) {
-                                       $stat_real = $stat_real . "\n" . raw_line($count, 0);
-                               }
+                               my $stat_real = get_stat_real($linenr, $lc);
 
                                my $skip_args = "";
                                if ($arg_pos > 1) {
@@ -6398,7 +6441,7 @@ sub process {
                }
 
 # check for uses of S_<PERMS> that could be octal for readability
-               if ($line =~ /\b($multi_mode_perms_string_search)\b/) {
+               while ($line =~ m{\b($multi_mode_perms_string_search)\b}g) {
                        my $oval = $1;
                        my $octal = perms_to_octal($oval);
                        if (WARN("SYMBOLIC_PERMS",
diff --git a/scripts/dtc/include-prefixes/cris b/scripts/dtc/include-prefixes/cris
deleted file mode 120000 (symlink)
index 736d998..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/cris/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/metag b/scripts/dtc/include-prefixes/metag
deleted file mode 120000 (symlink)
index 87a3c84..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/metag/boot/dts
\ No newline at end of file
index d2a84cda7e8d4de1d650a5da1d899a5f5caf0960..7bc2fde023a797e7a941622a0a6d45c11fdb8b6b 100644 (file)
@@ -30,6 +30,8 @@
 #include <linux/string.h>
 #include <net/flow.h>
 
+#include <trace/events/initcall.h>
+
 #define MAX_LSM_EVM_XATTR      2
 
 /* Maximum number of letters for an LSM name string */
@@ -45,10 +47,14 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] =
 
 static void __init do_security_initcalls(void)
 {
+       int ret;
        initcall_t *call;
        call = __security_initcall_start;
+       trace_initcall_level("security");
        while (call < __security_initcall_end) {
-               (*call) ();
+               trace_initcall_start((*call));
+               ret = (*call) ();
+               trace_initcall_finish((*call), ret);
                call++;
        }
 }
index 1eeb70e439d7999646d3fab9c38da5ea60cf1b60..4cafe6a19167613cb64b29ac59c895e91285b390 100644 (file)
@@ -6006,6 +6006,7 @@ static int selinux_msg_queue_msgctl(struct kern_ipc_perm *msq, int cmd)
                                    SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
        case IPC_STAT:
        case MSG_STAT:
+       case MSG_STAT_ANY:
                perms = MSGQ__GETATTR | MSGQ__ASSOCIATE;
                break;
        case IPC_SET:
@@ -6157,6 +6158,7 @@ static int selinux_shm_shmctl(struct kern_ipc_perm *shp, int cmd)
                                    SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
        case IPC_STAT:
        case SHM_STAT:
+       case SHM_STAT_ANY:
                perms = SHM__GETATTR | SHM__ASSOCIATE;
                break;
        case IPC_SET:
@@ -6272,6 +6274,7 @@ static int selinux_sem_semctl(struct kern_ipc_perm *sma, int cmd)
                break;
        case IPC_STAT:
        case SEM_STAT:
+       case SEM_STAT_ANY:
                perms = SEM__GETATTR | SEM__ASSOCIATE;
                break;
        default:
index 73549007bf9e63d75920376b71671d5bd55ca3f0..0b414836bebdcc619c49dd0d4747510b8d6be376 100644 (file)
@@ -3046,6 +3046,7 @@ static int smack_shm_shmctl(struct kern_ipc_perm *isp, int cmd)
        switch (cmd) {
        case IPC_STAT:
        case SHM_STAT:
+       case SHM_STAT_ANY:
                may = MAY_READ;
                break;
        case IPC_SET:
@@ -3139,6 +3140,7 @@ static int smack_sem_semctl(struct kern_ipc_perm *isp, int cmd)
        case GETALL:
        case IPC_STAT:
        case SEM_STAT:
+       case SEM_STAT_ANY:
                may = MAY_READ;
                break;
        case SETVAL:
@@ -3228,6 +3230,7 @@ static int smack_msg_queue_msgctl(struct kern_ipc_perm *isp, int cmd)
        switch (cmd) {
        case IPC_STAT:
        case MSG_STAT:
+       case MSG_STAT_ANY:
                may = MAY_READ;
                break;
        case IPC_SET:
index 481ab0e94ffad339ae96e4744e2b338ee8f3bad8..1980f68246cb84003a3ae29a6db61e951bf4ec54 100644 (file)
@@ -1128,13 +1128,14 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil
 }
 
 /* call with params_lock held */
+/* NOTE: this always call PREPARE unconditionally no matter whether
+ * runtime->oss.prepare is set or not
+ */
 static int snd_pcm_oss_prepare(struct snd_pcm_substream *substream)
 {
        int err;
        struct snd_pcm_runtime *runtime = substream->runtime;
 
-       if (!runtime->oss.prepare)
-               return 0;
        err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_PREPARE, NULL);
        if (err < 0) {
                pcm_dbg(substream->pcm,
index b84554893fab2c6819d4ced583c856ed6846394f..35ffccea94c3eb6cfe7755b6c1da7547d018a9a0 100644 (file)
@@ -617,7 +617,7 @@ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm,
                        changed = snd_pcm_hw_param_first(pcm, params, *v, NULL);
                else
                        changed = snd_pcm_hw_param_last(pcm, params, *v, NULL);
-               if (snd_BUG_ON(changed < 0))
+               if (changed < 0)
                        return changed;
                if (changed == 0)
                        continue;
index ab39ccb974c6f276a53003783d2619094afd4f32..0b030d8fe3fa2c89d330981a7aa4ce69311cb6a9 100644 (file)
 #include "clock.h"
 #include "quirks.h"
 
-static struct uac_clock_source_descriptor *
-       snd_usb_find_clock_source(struct usb_host_interface *ctrl_iface,
-                                 int clock_id)
+static void *find_uac_clock_desc(struct usb_host_interface *iface, int id,
+                                bool (*validator)(void *, int), u8 type)
 {
-       struct uac_clock_source_descriptor *cs = NULL;
+       void *cs = NULL;
 
-       while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-                                            ctrl_iface->extralen,
-                                            cs, UAC2_CLOCK_SOURCE))) {
-               if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id)
+       while ((cs = snd_usb_find_csint_desc(iface->extra, iface->extralen,
+                                            cs, type))) {
+               if (validator(cs, id))
                        return cs;
        }
 
        return NULL;
 }
 
-static struct uac3_clock_source_descriptor *
-       snd_usb_find_clock_source_v3(struct usb_host_interface *ctrl_iface,
-                                 int clock_id)
+static bool validate_clock_source_v2(void *p, int id)
 {
-       struct uac3_clock_source_descriptor *cs = NULL;
-
-       while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-                                            ctrl_iface->extralen,
-                                            cs, UAC3_CLOCK_SOURCE))) {
-               if (cs->bClockID == clock_id)
-                       return cs;
-       }
-
-       return NULL;
+       struct uac_clock_source_descriptor *cs = p;
+       return cs->bLength == sizeof(*cs) && cs->bClockID == id;
 }
 
-static struct uac_clock_selector_descriptor *
-       snd_usb_find_clock_selector(struct usb_host_interface *ctrl_iface,
-                                   int clock_id)
+static bool validate_clock_source_v3(void *p, int id)
 {
-       struct uac_clock_selector_descriptor *cs = NULL;
-
-       while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-                                            ctrl_iface->extralen,
-                                            cs, UAC2_CLOCK_SELECTOR))) {
-               if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id) {
-                       if (cs->bLength < 5 + cs->bNrInPins)
-                               return NULL;
-                       return cs;
-               }
-       }
-
-       return NULL;
+       struct uac3_clock_source_descriptor *cs = p;
+       return cs->bLength == sizeof(*cs) && cs->bClockID == id;
 }
 
-static struct uac3_clock_selector_descriptor *
-       snd_usb_find_clock_selector_v3(struct usb_host_interface *ctrl_iface,
-                                   int clock_id)
+static bool validate_clock_selector_v2(void *p, int id)
 {
-       struct uac3_clock_selector_descriptor *cs = NULL;
-
-       while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-                                            ctrl_iface->extralen,
-                                            cs, UAC3_CLOCK_SELECTOR))) {
-               if (cs->bClockID == clock_id)
-                       return cs;
-       }
-
-       return NULL;
+       struct uac_clock_selector_descriptor *cs = p;
+       return cs->bLength >= sizeof(*cs) && cs->bClockID == id &&
+               cs->bLength == 7 + cs->bNrInPins;
 }
 
-static struct uac_clock_multiplier_descriptor *
-       snd_usb_find_clock_multiplier(struct usb_host_interface *ctrl_iface,
-                                     int clock_id)
+static bool validate_clock_selector_v3(void *p, int id)
 {
-       struct uac_clock_multiplier_descriptor *cs = NULL;
-
-       while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-                                            ctrl_iface->extralen,
-                                            cs, UAC2_CLOCK_MULTIPLIER))) {
-               if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id)
-                       return cs;
-       }
-
-       return NULL;
+       struct uac3_clock_selector_descriptor *cs = p;
+       return cs->bLength >= sizeof(*cs) && cs->bClockID == id &&
+               cs->bLength == 11 + cs->bNrInPins;
 }
 
-static struct uac3_clock_multiplier_descriptor *
-       snd_usb_find_clock_multiplier_v3(struct usb_host_interface *ctrl_iface,
-                                     int clock_id)
+static bool validate_clock_multiplier_v2(void *p, int id)
 {
-       struct uac3_clock_multiplier_descriptor *cs = NULL;
+       struct uac_clock_multiplier_descriptor *cs = p;
+       return cs->bLength == sizeof(*cs) && cs->bClockID == id;
+}
 
-       while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-                                            ctrl_iface->extralen,
-                                            cs, UAC3_CLOCK_MULTIPLIER))) {
-               if (cs->bClockID == clock_id)
-                       return cs;
-       }
+static bool validate_clock_multiplier_v3(void *p, int id)
+{
+       struct uac3_clock_multiplier_descriptor *cs = p;
+       return cs->bLength == sizeof(*cs) && cs->bClockID == id;
+}
 
-       return NULL;
+#define DEFINE_FIND_HELPER(name, obj, validator, type)         \
+static obj *name(struct usb_host_interface *iface, int id)     \
+{                                                              \
+       return find_uac_clock_desc(iface, id, validator, type); \
 }
 
+DEFINE_FIND_HELPER(snd_usb_find_clock_source,
+                  struct uac_clock_source_descriptor,
+                  validate_clock_source_v2, UAC2_CLOCK_SOURCE);
+DEFINE_FIND_HELPER(snd_usb_find_clock_source_v3,
+                  struct uac3_clock_source_descriptor,
+                  validate_clock_source_v3, UAC3_CLOCK_SOURCE);
+
+DEFINE_FIND_HELPER(snd_usb_find_clock_selector,
+                  struct uac_clock_selector_descriptor,
+                  validate_clock_selector_v2, UAC2_CLOCK_SELECTOR);
+DEFINE_FIND_HELPER(snd_usb_find_clock_selector_v3,
+                  struct uac3_clock_selector_descriptor,
+                  validate_clock_selector_v3, UAC3_CLOCK_SELECTOR);
+
+DEFINE_FIND_HELPER(snd_usb_find_clock_multiplier,
+                  struct uac_clock_multiplier_descriptor,
+                  validate_clock_multiplier_v2, UAC2_CLOCK_MULTIPLIER);
+DEFINE_FIND_HELPER(snd_usb_find_clock_multiplier_v3,
+                  struct uac3_clock_multiplier_descriptor,
+                  validate_clock_multiplier_v3, UAC3_CLOCK_MULTIPLIER);
+
 static int uac_clock_selector_get_val(struct snd_usb_audio *chip, int selector_id)
 {
        unsigned char buf;
index 4ed569fcb1390cea11b287aa4b34ef3b0851385e..b21b586b985424a03338023f96a3e9e3d996b2af 100644 (file)
@@ -7,6 +7,7 @@
 
 #define spinlock_t             pthread_mutex_t
 #define DEFINE_SPINLOCK(x)     pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+#define __SPIN_LOCK_UNLOCKED(x)        (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
 
 #define spin_lock_irqsave(x, f)                (void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)   (void)f, pthread_mutex_unlock(x)
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
new file mode 100755 (executable)
index 0000000..b28feea
--- /dev/null
@@ -0,0 +1,770 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2015 - Steven Rostedt, Red Hat Inc.
+# Copyright 2017 - Steven Rostedt, VMware, Inc.
+#
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+# usage:
+#  config-bisect.pl [options] good-config bad-config [good|bad]
+#
+
+# Compares a good config to a bad config, then takes half of the diffs
+# and produces a config that is somewhere between the good config and
+# the bad config. That is, the resulting config will start with the
+# good config and will try to make half of the differences of between
+# the good and bad configs match the bad config. It tries because of
+# dependencies between the two configs it may not be able to change
+# exactly half of the configs that are different between the two config
+# files.
+
+# Here's a normal way to use it:
+#
+#  $ cd /path/to/linux/kernel
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config
+
+# This will now pull in good config (blowing away .config in that directory
+# so do not make that be one of the good or bad configs), and then
+# build the config with "make oldconfig" to make sure it matches the
+# current kernel. It will then store the configs in that result for
+# the good config. It does the same for the bad config as well.
+# The algorithm will run, merging half of the differences between
+# the two configs and building them with "make oldconfig" to make sure
+# the result changes (dependencies may reset changes the tool had made).
+# It then copies the result of its good config to /path/to/good/config.tmp
+# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the
+# files passed in). And the ".config" that you should test will be in
+# directory
+
+# After the first run, determine if the result is good or bad then
+# run the same command appending the result
+
+# For good results:
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config good
+
+# For bad results:
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config bad
+
+# Do not change the good-config or bad-config, config-bisect.pl will
+# copy the good-config to a temp file with the same name as good-config
+# but with a ".tmp" after it. It will do the same with the bad-config.
+
+# If "good" or "bad" is not stated at the end, it will copy the good and
+# bad configs to the .tmp versions. If a .tmp version already exists, it will
+# warn before writing over them (-r will not warn, and just write over them).
+# If the last config is labeled "good", then it will copy it to the good .tmp
+# version. If the last config is labeled "bad", it will copy it to the bad
+# .tmp version. It will continue this until it can not merge the two any more
+# without the result being equal to either the good or bad .tmp configs.
+
+my $start = 0;
+my $val = "";
+
+my $pwd = `pwd`;
+chomp $pwd;
+my $tree = $pwd;
+my $build;
+
+my $output_config;
+my $reset_bisect;
+
+sub usage {
+    print << "EOF"
+
+usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad]
+  -l [optional] define location of linux-tree (default is current directory)
+  -b [optional] define location to build (O=build-dir) (default is linux-tree)
+  good-config the config that is considered good
+  bad-config the config that does not work
+  "good" add this if the last run produced a good config
+  "bad" add this if the last run produced a bad config
+  If "good" or "bad" is not specified, then it is the start of a new bisect
+
+  Note, each run will create copy of good and bad configs with ".tmp" appended.
+
+EOF
+;
+
+    exit(-1);
+}
+
+sub doprint {
+    print @_;
+}
+
+sub dodie {
+    doprint "CRITICAL FAILURE... ", @_, "\n";
+
+    die @_, "\n";
+}
+
+sub expand_path {
+    my ($file) = @_;
+
+    if ($file =~ m,^/,) {
+       return $file;
+    }
+    return "$pwd/$file";
+}
+
+sub read_prompt {
+    my ($cancel, $prompt) = @_;
+
+    my $ans;
+
+    for (;;) {
+       if ($cancel) {
+           print "$prompt [y/n/C] ";
+       } else {
+           print "$prompt [y/N] ";
+       }
+       $ans = <STDIN>;
+       chomp $ans;
+       if ($ans =~ /^\s*$/) {
+           if ($cancel) {
+               $ans = "c";
+           } else {
+               $ans = "n";
+           }
+       }
+       last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+       if ($cancel) {
+           last if ($ans =~ /^c$/i);
+           print "Please answer either 'y', 'n' or 'c'.\n";
+       } else {
+           print "Please answer either 'y' or 'n'.\n";
+       }
+    }
+    if ($ans =~ /^c/i) {
+       exit;
+    }
+    if ($ans !~ /^y$/i) {
+       return 0;
+    }
+    return 1;
+}
+
+sub read_yn {
+    my ($prompt) = @_;
+
+    return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+    my ($prompt) = @_;
+
+    return read_prompt 1, $prompt;
+}
+
+sub run_command {
+    my ($command, $redirect) = @_;
+    my $start_time;
+    my $end_time;
+    my $dord = 0;
+    my $pid;
+
+    $start_time = time;
+
+    doprint("$command ... ");
+
+    $pid = open(CMD, "$command 2>&1 |") or
+       dodie "unable to exec $command";
+
+    if (defined($redirect)) {
+       open (RD, ">$redirect") or
+           dodie "failed to write to redirect $redirect";
+       $dord = 1;
+    }
+
+    while (<CMD>) {
+       print RD  if ($dord);
+    }
+
+    waitpid($pid, 0);
+    my $failed = $?;
+
+    close(CMD);
+    close(RD)  if ($dord);
+
+    $end_time = time;
+    my $delta = $end_time - $start_time;
+
+    if ($delta == 1) {
+       doprint "[1 second] ";
+    } else {
+       doprint "[$delta seconds] ";
+    }
+
+    if ($failed) {
+       doprint "FAILED!\n";
+    } else {
+       doprint "SUCCESS\n";
+    }
+
+    return !$failed;
+}
+
+###### CONFIG BISECT ######
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+my $make;
+
+sub make_oldconfig {
+
+    if (!run_command "$make olddefconfig") {
+       # Perhaps olddefconfig doesn't exist in this version of the kernel
+       # try oldnoconfig
+       doprint "olddefconfig failed, trying make oldnoconfig\n";
+       if (!run_command "$make oldnoconfig") {
+           doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+           # try a yes '' | oldconfig
+           run_command "yes '' | $make oldconfig" or
+               dodie "failed make config oldconfig";
+       }
+    }
+}
+
+sub assign_configs {
+    my ($hash, $config) = @_;
+
+    doprint "Reading configs from $config\n";
+
+    open (IN, $config)
+       or dodie "Failed to read $config";
+
+    while (<IN>) {
+       chomp;
+       if (/^((CONFIG\S*)=.*)/) {
+           ${$hash}{$2} = $1;
+       } elsif (/^(# (CONFIG\S*) is not set)/) {
+           ${$hash}{$2} = $1;
+       }
+    }
+
+    close(IN);
+}
+
+sub process_config_ignore {
+    my ($config) = @_;
+
+    assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+    my ($config) = @_;
+
+    my $arr = $dependency{$config};
+    if (!defined($arr)) {
+       return ();
+    }
+
+    my @deps = @{$arr};
+
+    foreach my $dep (@{$arr}) {
+       print "ADD DEP $dep\n";
+       @deps = (@deps, get_dependencies $dep);
+    }
+
+    return @deps;
+}
+
+sub save_config {
+    my ($pc, $file) = @_;
+
+    my %configs = %{$pc};
+
+    doprint "Saving configs into $file\n";
+
+    open(OUT, ">$file") or dodie "Can not write to $file";
+
+    foreach my $config (keys %configs) {
+       print OUT "$configs{$config}\n";
+    }
+    close(OUT);
+}
+
+sub create_config {
+    my ($name, $pc) = @_;
+
+    doprint "Creating old config from $name configs\n";
+
+    save_config $pc, $output_config;
+
+    make_oldconfig;
+}
+
+# compare two config hashes, and return configs with different vals.
+# It returns B's config values, but you can use A to see what A was.
+sub diff_config_vals {
+    my ($pa, $pb) = @_;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    my %ret;
+
+    foreach my $item (keys %a) {
+       if (defined($b{$item}) && $b{$item} ne $a{$item}) {
+           $ret{$item} = $b{$item};
+       }
+    }
+
+    return %ret;
+}
+
+# compare two config hashes and return the configs in B but not A
+sub diff_configs {
+    my ($pa, $pb) = @_;
+
+    my %ret;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    foreach my $item (keys %b) {
+       if (!defined($a{$item})) {
+           $ret{$item} = $b{$item};
+       }
+    }
+
+    return %ret;
+}
+
+# return if two configs are equal or not
+# 0 is equal +1 b has something a does not
+# +1 if a and b have a different item.
+# -1 if a has something b does not
+sub compare_configs {
+    my ($pa, $pb) = @_;
+
+    my %ret;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    foreach my $item (keys %b) {
+       if (!defined($a{$item})) {
+           return 1;
+       }
+       if ($a{$item} ne $b{$item}) {
+           return 1;
+       }
+    }
+
+    foreach my $item (keys %a) {
+       if (!defined($b{$item})) {
+           return -1;
+       }
+    }
+
+    return 0;
+}
+
+sub process_failed {
+    my ($config) = @_;
+
+    doprint "\n\n***************************************\n";
+    doprint "Found bad config: $config\n";
+    doprint "***************************************\n\n";
+}
+
+sub process_new_config {
+    my ($tc, $nc, $gc, $bc) = @_;
+
+    my %tmp_config = %{$tc};
+    my %good_configs = %{$gc};
+    my %bad_configs = %{$bc};
+
+    my %new_configs;
+
+    my $runtest = 1;
+    my $ret;
+
+    create_config "tmp_configs", \%tmp_config;
+    assign_configs \%new_configs, $output_config;
+
+    $ret = compare_configs \%new_configs, \%bad_configs;
+    if (!$ret) {
+       doprint "New config equals bad config, try next test\n";
+       $runtest = 0;
+    }
+
+    if ($runtest) {
+       $ret = compare_configs \%new_configs, \%good_configs;
+       if (!$ret) {
+           doprint "New config equals good config, try next test\n";
+           $runtest = 0;
+       }
+    }
+
+    %{$nc} = %new_configs;
+
+    return $runtest;
+}
+
+sub convert_config {
+    my ($config) = @_;
+
+    if ($config =~ /^# (.*) is not set/) {
+       $config = "$1=n";
+    }
+
+    $config =~ s/^CONFIG_//;
+    return $config;
+}
+
+sub print_config {
+    my ($sym, $config) = @_;
+
+    $config = convert_config $config;
+    doprint "$sym$config\n";
+}
+
+sub print_config_compare {
+    my ($good_config, $bad_config) = @_;
+
+    $good_config = convert_config $good_config;
+    $bad_config = convert_config $bad_config;
+
+    my $good_value = $good_config;
+    my $bad_value = $bad_config;
+    $good_value =~ s/(.*)=//;
+    my $config = $1;
+
+    $bad_value =~ s/.*=//;
+
+    doprint " $config $good_value -> $bad_value\n";
+}
+
+# Pass in:
+# $phalf: half of the configs names you want to add
+# $oconfigs: The orginial configs to start with
+# $sconfigs: The source to update $oconfigs with (from $phalf)
+# $which: The name of which half that is updating (top / bottom)
+# $type: The name of the source type (good / bad)
+sub make_half {
+    my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_;
+
+    my @half = @{$phalf};
+    my %orig_configs = %{$oconfigs};
+    my %source_configs = %{$sconfigs};
+
+    my %tmp_config = %orig_configs;
+
+    doprint "Settings bisect with $which half of $type configs:\n";
+    foreach my $item (@half) {
+       doprint "Updating $item to $source_configs{$item}\n";
+       $tmp_config{$item} = $source_configs{$item};
+    }
+
+    return %tmp_config;
+}
+
+sub run_config_bisect {
+    my ($pgood, $pbad) = @_;
+
+    my %good_configs = %{$pgood};
+    my %bad_configs = %{$pbad};
+
+    my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
+    my %b_configs = diff_configs \%good_configs, \%bad_configs;
+    my %g_configs = diff_configs \%bad_configs, \%good_configs;
+
+    # diff_arr is what is in both good and bad but are different (y->n)
+    my @diff_arr = keys %diff_configs;
+    my $len_diff = $#diff_arr + 1;
+
+    # b_arr is what is in bad but not in good (has depends)
+    my @b_arr = keys %b_configs;
+    my $len_b = $#b_arr + 1;
+
+    # g_arr is what is in good but not in bad
+    my @g_arr = keys %g_configs;
+    my $len_g = $#g_arr + 1;
+
+    my $runtest = 0;
+    my %new_configs;
+    my $ret;
+
+    # Look at the configs that are different between good and bad.
+    # This does not include those that depend on other configs
+    #  (configs depending on other configs that are not set would
+    #   not show up even as a "# CONFIG_FOO is not set"
+
+
+    doprint "# of configs to check:             $len_diff\n";
+    doprint "# of configs showing only in good: $len_g\n";
+    doprint "# of configs showing only in bad:  $len_b\n";
+
+    if ($len_diff > 0) {
+       # Now test for different values
+
+       doprint "Configs left to check:\n";
+       doprint "  Good Config\t\t\tBad Config\n";
+       doprint "  -----------\t\t\t----------\n";
+       foreach my $item (@diff_arr) {
+           doprint "  $good_configs{$item}\t$bad_configs{$item}\n";
+       }
+
+       my $half = int($#diff_arr / 2);
+       my @tophalf = @diff_arr[0 .. $half];
+
+       doprint "Set tmp config to be good config with some bad config values\n";
+
+       my %tmp_config = make_half \@tophalf, \%good_configs,
+           \%bad_configs, "top", "bad";
+
+       $runtest = process_new_config \%tmp_config, \%new_configs,
+                           \%good_configs, \%bad_configs;
+
+       if (!$runtest) {
+           doprint "Set tmp config to be bad config with some good config values\n";
+
+           my %tmp_config = make_half \@tophalf, \%bad_configs,
+               \%good_configs, "top", "good";
+
+           $runtest = process_new_config \%tmp_config, \%new_configs,
+               \%good_configs, \%bad_configs;
+       }
+    }
+
+    if (!$runtest && $len_diff > 0) {
+       # do the same thing, but this time with bottom half
+
+       my $half = int($#diff_arr / 2);
+       my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
+
+       doprint "Set tmp config to be good config with some bad config values\n";
+
+       my %tmp_config = make_half \@bottomhalf, \%good_configs,
+           \%bad_configs, "bottom", "bad";
+
+       $runtest = process_new_config \%tmp_config, \%new_configs,
+                           \%good_configs, \%bad_configs;
+
+       if (!$runtest) {
+           doprint "Set tmp config to be bad config with some good config values\n";
+
+           my %tmp_config = make_half \@bottomhalf, \%bad_configs,
+               \%good_configs, "bottom", "good";
+
+           $runtest = process_new_config \%tmp_config, \%new_configs,
+               \%good_configs, \%bad_configs;
+       }
+    }
+
+    if ($runtest) {
+       make_oldconfig;
+       doprint "READY TO TEST .config IN $build\n";
+       return 0;
+    }
+
+    doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+    doprint "Hmm, can't make any more changes without making good == bad?\n";
+    doprint "Difference between good (+) and bad (-)\n";
+
+    foreach my $item (keys %bad_configs) {
+       if (!defined($good_configs{$item})) {
+           print_config "-", $bad_configs{$item};
+       }
+    }
+
+    foreach my $item (keys %good_configs) {
+       next if (!defined($bad_configs{$item}));
+       if ($good_configs{$item} ne $bad_configs{$item}) {
+           print_config_compare $good_configs{$item}, $bad_configs{$item};
+       }
+    }
+
+    foreach my $item (keys %good_configs) {
+       if (!defined($bad_configs{$item})) {
+           print_config "+", $good_configs{$item};
+       }
+    }
+    return -1;
+}
+
+sub config_bisect {
+    my ($good_config, $bad_config) = @_;
+    my $ret;
+
+    my %good_configs;
+    my %bad_configs;
+    my %tmp_configs;
+
+    doprint "Run good configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $good_config;
+    create_config "$good_config", \%tmp_configs;
+    assign_configs \%good_configs, $output_config;
+
+    doprint "Run bad configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $bad_config;
+    create_config "$bad_config", \%tmp_configs;
+    assign_configs \%bad_configs, $output_config;
+
+    save_config \%good_configs, $good_config;
+    save_config \%bad_configs, $bad_config;
+
+    return run_config_bisect \%good_configs, \%bad_configs;
+}
+
+while ($#ARGV >= 0) {
+    if ($ARGV[0] !~ m/^-/) {
+       last;
+    }
+    my $opt = shift @ARGV;
+
+    if ($opt eq "-b") {
+       $val = shift @ARGV;
+       if (!defined($val)) {
+           die "-b requires value\n";
+       }
+       $build = $val;
+    }
+
+    elsif ($opt eq "-l") {
+       $val = shift @ARGV;
+       if (!defined($val)) {
+           die "-l requires value\n";
+       }
+       $tree = $val;
+    }
+
+    elsif ($opt eq "-r") {
+       $reset_bisect = 1;
+    }
+
+    elsif ($opt eq "-h") {
+       usage;
+    }
+
+    else {
+       die "Unknow option $opt\n";
+    }
+}
+
+$build = $tree if (!defined($build));
+
+$tree = expand_path $tree;
+$build = expand_path $build;
+
+if ( ! -d $tree ) {
+    die "$tree not a directory\n";
+}
+
+if ( ! -d $build ) {
+    die "$build not a directory\n";
+}
+
+usage if $#ARGV < 1;
+
+if ($#ARGV == 1) {
+    $start = 1;
+} elsif ($#ARGV == 2) {
+    $val = $ARGV[2];
+    if ($val ne "good" && $val ne "bad") {
+       die "Unknown command '$val', bust be either \"good\" or \"bad\"\n";
+    }
+} else {
+    usage;
+}
+
+my $good_start = expand_path $ARGV[0];
+my $bad_start = expand_path $ARGV[1];
+
+my $good = "$good_start.tmp";
+my $bad = "$bad_start.tmp";
+
+$make = "make";
+
+if ($build ne $tree) {
+    $make = "make O=$build"
+}
+
+$output_config = "$build/.config";
+
+if ($start) {
+    if ( ! -f $good_start ) {
+       die "$good_start not found\n";
+    }
+    if ( ! -f $bad_start ) {
+       die "$bad_start not found\n";
+    }
+    if ( -f $good || -f $bad ) {
+       my $p = "";
+
+       if ( -f $good ) {
+           $p = "$good exists\n";
+       }
+
+       if ( -f $bad ) {
+           $p = "$p$bad exists\n";
+       }
+
+       if (!defined($reset_bisect)) {
+           if (!read_yn "${p}Overwrite and start new bisect anyway?") {
+               exit (-1);
+           }
+       }
+    }
+    run_command "cp $good_start $good" or die "failed to copy to $good\n";
+    run_command "cp $bad_start $bad" or die "faield to copy to $bad\n";
+} else {
+    if ( ! -f $good ) {
+       die "Can not find file $good\n";
+    }
+    if ( ! -f $bad ) {
+       die "Can not find file $bad\n";
+    }
+    if ($val eq "good") {
+       run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+    } elsif ($val eq "bad") {
+       run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+    }
+}
+
+chdir $tree || die "can't change directory to $tree";
+
+my $ret = config_bisect $good, $bad;
+
+if (!$ret) {
+    exit(0);
+}
+
+if ($ret > 0) {
+    doprint "Cleaning temp files\n";
+    run_command "rm $good";
+    run_command "rm $bad";
+    exit(1);
+} else {
+    doprint "See good and bad configs for details:\n";
+    doprint "good: $good\n";
+    doprint "bad:  $bad\n";
+    doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+}
+exit(2);
index 8809f244bb7c353b0d3cacbe6f8d038cf84134d5..87af8a68ab2529ffc005ed2bd66aff93944bd3ca 100755 (executable)
@@ -10,6 +10,7 @@ use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
 use File::Path qw(mkpath);
 use File::Copy qw(cp);
 use FileHandle;
+use FindBin;
 
 my $VERSION = "0.2";
 
@@ -22,6 +23,11 @@ my %evals;
 
 #default opts
 my %default = (
+    "MAILER"                   => "sendmail",  # default mailer
+    "EMAIL_ON_ERROR"           => 1,
+    "EMAIL_WHEN_FINISHED"      => 1,
+    "EMAIL_WHEN_CANCELED"      => 0,
+    "EMAIL_WHEN_STARTED"       => 0,
     "NUM_TESTS"                        => 1,
     "TEST_TYPE"                        => "build",
     "BUILD_TYPE"               => "randconfig",
@@ -59,6 +65,7 @@ my %default = (
     "GRUB_REBOOT"              => "grub2-reboot",
     "SYSLINUX"                 => "extlinux",
     "SYSLINUX_PATH"            => "/boot/extlinux",
+    "CONNECT_TIMEOUT"          => 25,
 
 # required, and we will ask users if they don't have them but we keep the default
 # value something that is common.
@@ -163,6 +170,8 @@ my $store_failures;
 my $store_successes;
 my $test_name;
 my $timeout;
+my $connect_timeout;
+my $config_bisect_exec;
 my $booted_timeout;
 my $detect_triplefault;
 my $console;
@@ -204,6 +213,20 @@ my $install_time;
 my $reboot_time;
 my $test_time;
 
+my $pwd;
+my $dirname = $FindBin::Bin;
+
+my $mailto;
+my $mailer;
+my $mail_path;
+my $mail_command;
+my $email_on_error;
+my $email_when_finished;
+my $email_when_started;
+my $email_when_canceled;
+
+my $script_start_time = localtime();
+
 # set when a test is something other that just building or install
 # which would require more options.
 my $buildonly = 1;
@@ -229,6 +252,14 @@ my $no_reboot = 1;
 my $reboot_success = 0;
 
 my %option_map = (
+    "MAILTO"                   => \$mailto,
+    "MAILER"                   => \$mailer,
+    "MAIL_PATH"                        => \$mail_path,
+    "MAIL_COMMAND"             => \$mail_command,
+    "EMAIL_ON_ERROR"           => \$email_on_error,
+    "EMAIL_WHEN_FINISHED"      => \$email_when_finished,
+    "EMAIL_WHEN_STARTED"       => \$email_when_started,
+    "EMAIL_WHEN_CANCELED"      => \$email_when_canceled,
     "MACHINE"                  => \$machine,
     "SSH_USER"                 => \$ssh_user,
     "TMP_DIR"                  => \$tmpdir,
@@ -296,6 +327,8 @@ my %option_map = (
     "STORE_SUCCESSES"          => \$store_successes,
     "TEST_NAME"                        => \$test_name,
     "TIMEOUT"                  => \$timeout,
+    "CONNECT_TIMEOUT"          => \$connect_timeout,
+    "CONFIG_BISECT_EXEC"       => \$config_bisect_exec,
     "BOOTED_TIMEOUT"           => \$booted_timeout,
     "CONSOLE"                  => \$console,
     "CLOSE_CONSOLE_SIGNAL"     => \$close_console_signal,
@@ -337,6 +370,7 @@ my %used_options;
 
 # default variables that can be used
 chomp ($variable{"PWD"} = `pwd`);
+$pwd = $variable{"PWD"};
 
 $config_help{"MACHINE"} = << "EOF"
  The machine hostname that you will test.
@@ -718,21 +752,13 @@ sub set_value {
 
     my $prvalue = process_variables($rvalue);
 
-    if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") {
+    if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ &&
+       $prvalue !~ /^(config_|)bisect$/ &&
+       $prvalue !~ /^build$/ &&
+       $buildonly) {
+
        # Note if a test is something other than build, then we
        # will need other mandatory options.
-       if ($prvalue ne "install") {
-           # for bisect, we need to check BISECT_TYPE
-           if ($prvalue ne "bisect") {
-               $buildonly = 0;
-           }
-       } else {
-           # install still limits some mandatory options.
-           $buildonly = 2;
-       }
-    }
-
-    if ($buildonly && $lvalue =~ /^BISECT_TYPE(\[.*\])?$/ && $prvalue ne "build") {
        if ($prvalue ne "install") {
            $buildonly = 0;
        } else {
@@ -1140,7 +1166,8 @@ sub __read_config {
 sub get_test_case {
        print "What test case would you like to run?\n";
        print " (build, install or boot)\n";
-       print " Other tests are available but require editing the config file\n";
+       print " Other tests are available but require editing ktest.conf\n";
+       print " (see tools/testing/ktest/sample.conf)\n";
        my $ans = <STDIN>;
        chomp $ans;
        $default{"TEST_TYPE"} = $ans;
@@ -1328,8 +1355,8 @@ sub reboot {
     my ($time) = @_;
     my $powercycle = 0;
 
-    # test if the machine can be connected to within 5 seconds
-    my $stat = run_ssh("echo check machine status", 5);
+    # test if the machine can be connected to within a few seconds
+    my $stat = run_ssh("echo check machine status", $connect_timeout);
     if (!$stat) {
        doprint("power cycle\n");
        $powercycle = 1;
@@ -1404,10 +1431,18 @@ sub do_not_reboot {
 
     return $test_type eq "build" || $no_reboot ||
        ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
-       ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build");
+       ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") ||
+       ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build");
 }
 
+my $in_die = 0;
+
 sub dodie {
+
+    # avoid recusion
+    return if ($in_die);
+    $in_die = 1;
+
     doprint "CRITICAL FAILURE... ", @_, "\n";
 
     my $i = $iteration;
@@ -1426,6 +1461,11 @@ sub dodie {
        print " See $opt{LOG_FILE} for more info.\n";
     }
 
+    if ($email_on_error) {
+        send_email("KTEST: critical failure for your [$test_type] test",
+                "Your test started at $script_start_time has failed with:\n@_\n");
+    }
+
     if ($monitor_cnt) {
            # restore terminal settings
            system("stty $stty_orig");
@@ -1477,7 +1517,7 @@ sub exec_console {
     close($pts);
 
     exec $console or
-       die "Can't open console $console";
+       dodie "Can't open console $console";
 }
 
 sub open_console {
@@ -1515,6 +1555,9 @@ sub close_console {
     doprint "kill child process $pid\n";
     kill $close_console_signal, $pid;
 
+    doprint "wait for child process $pid to exit\n";
+    waitpid($pid, 0);
+
     print "closing!\n";
     close($fp);
 
@@ -1625,7 +1668,7 @@ sub save_logs {
 
        if (!-d $dir) {
            mkpath($dir) or
-               die "can't create $dir";
+               dodie "can't create $dir";
        }
 
        my %files = (
@@ -1638,7 +1681,7 @@ sub save_logs {
        while (my ($name, $source) = each(%files)) {
                if (-f "$source") {
                        cp "$source", "$dir/$name" or
-                               die "failed to copy $source";
+                               dodie "failed to copy $source";
                }
        }
 
@@ -1692,6 +1735,7 @@ sub run_command {
     my $end_time;
     my $dolog = 0;
     my $dord = 0;
+    my $dostdout = 0;
     my $pid;
 
     $command =~ s/\$SSH_USER/$ssh_user/g;
@@ -1710,9 +1754,15 @@ sub run_command {
     }
 
     if (defined($redirect)) {
-       open (RD, ">$redirect") or
-           dodie "failed to write to redirect $redirect";
-       $dord = 1;
+       if ($redirect eq 1) {
+           $dostdout = 1;
+           # Have the output of the command on its own line
+           doprint "\n";
+       } else {
+           open (RD, ">$redirect") or
+               dodie "failed to write to redirect $redirect";
+           $dord = 1;
+       }
     }
 
     my $hit_timeout = 0;
@@ -1734,6 +1784,7 @@ sub run_command {
        }
        print LOG $line if ($dolog);
        print RD $line if ($dord);
+       print $line if ($dostdout);
     }
 
     waitpid($pid, 0);
@@ -1812,7 +1863,7 @@ sub get_grub2_index {
     $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g;
 
     open(IN, "$ssh_grub |")
-       or die "unable to get $grub_file";
+       or dodie "unable to get $grub_file";
 
     my $found = 0;
 
@@ -1821,13 +1872,13 @@ sub get_grub2_index {
            $grub_number++;
            $found = 1;
            last;
-       } elsif (/^menuentry\s/) {
+       } elsif (/^menuentry\s|^submenu\s/) {
            $grub_number++;
        }
     }
     close(IN);
 
-    die "Could not find '$grub_menu' in $grub_file on $machine"
+    dodie "Could not find '$grub_menu' in $grub_file on $machine"
        if (!$found);
     doprint "$grub_number\n";
     $last_grub_menu = $grub_menu;
@@ -1855,7 +1906,7 @@ sub get_grub_index {
     $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
 
     open(IN, "$ssh_grub |")
-       or die "unable to get menu.lst";
+       or dodie "unable to get menu.lst";
 
     my $found = 0;
 
@@ -1870,7 +1921,7 @@ sub get_grub_index {
     }
     close(IN);
 
-    die "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+    dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine"
        if (!$found);
     doprint "$grub_number\n";
     $last_grub_menu = $grub_menu;
@@ -1983,7 +2034,7 @@ sub monitor {
     my $full_line = "";
 
     open(DMESG, "> $dmesg") or
-       die "unable to write to $dmesg";
+       dodie "unable to write to $dmesg";
 
     reboot_to;
 
@@ -2862,7 +2913,7 @@ sub run_bisect {
 sub update_bisect_replay {
     my $tmp_log = "$tmpdir/ktest_bisect_log";
     run_command "git bisect log > $tmp_log" or
-       die "can't create bisect log";
+       dodie "can't create bisect log";
     return $tmp_log;
 }
 
@@ -2871,9 +2922,9 @@ sub bisect {
 
     my $result;
 
-    die "BISECT_GOOD[$i] not defined\n"        if (!defined($bisect_good));
-    die "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad));
-    die "BISECT_TYPE[$i] not defined\n"        if (!defined($bisect_type));
+    dodie "BISECT_GOOD[$i] not defined\n"      if (!defined($bisect_good));
+    dodie "BISECT_BAD[$i] not defined\n"       if (!defined($bisect_bad));
+    dodie "BISECT_TYPE[$i] not defined\n"      if (!defined($bisect_type));
 
     my $good = $bisect_good;
     my $bad = $bisect_bad;
@@ -2936,7 +2987,7 @@ sub bisect {
        if ($check ne "good") {
            doprint "TESTING BISECT BAD [$bad]\n";
            run_command "git checkout $bad" or
-               die "Failed to checkout $bad";
+               dodie "Failed to checkout $bad";
 
            $result = run_bisect $type;
 
@@ -2948,7 +2999,7 @@ sub bisect {
        if ($check ne "bad") {
            doprint "TESTING BISECT GOOD [$good]\n";
            run_command "git checkout $good" or
-               die "Failed to checkout $good";
+               dodie "Failed to checkout $good";
 
            $result = run_bisect $type;
 
@@ -2959,7 +3010,7 @@ sub bisect {
 
        # checkout where we started
        run_command "git checkout $head" or
-           die "Failed to checkout $head";
+           dodie "Failed to checkout $head";
     }
 
     run_command "git bisect start$start_files" or
@@ -3092,76 +3143,6 @@ sub create_config {
     make_oldconfig;
 }
 
-# compare two config hashes, and return configs with different vals.
-# It returns B's config values, but you can use A to see what A was.
-sub diff_config_vals {
-    my ($pa, $pb) = @_;
-
-    # crappy Perl way to pass in hashes.
-    my %a = %{$pa};
-    my %b = %{$pb};
-
-    my %ret;
-
-    foreach my $item (keys %a) {
-       if (defined($b{$item}) && $b{$item} ne $a{$item}) {
-           $ret{$item} = $b{$item};
-       }
-    }
-
-    return %ret;
-}
-
-# compare two config hashes and return the configs in B but not A
-sub diff_configs {
-    my ($pa, $pb) = @_;
-
-    my %ret;
-
-    # crappy Perl way to pass in hashes.
-    my %a = %{$pa};
-    my %b = %{$pb};
-
-    foreach my $item (keys %b) {
-       if (!defined($a{$item})) {
-           $ret{$item} = $b{$item};
-       }
-    }
-
-    return %ret;
-}
-
-# return if two configs are equal or not
-# 0 is equal +1 b has something a does not
-# +1 if a and b have a different item.
-# -1 if a has something b does not
-sub compare_configs {
-    my ($pa, $pb) = @_;
-
-    my %ret;
-
-    # crappy Perl way to pass in hashes.
-    my %a = %{$pa};
-    my %b = %{$pb};
-
-    foreach my $item (keys %b) {
-       if (!defined($a{$item})) {
-           return 1;
-       }
-       if ($a{$item} ne $b{$item}) {
-           return 1;
-       }
-    }
-
-    foreach my $item (keys %a) {
-       if (!defined($b{$item})) {
-           return -1;
-       }
-    }
-
-    return 0;
-}
-
 sub run_config_bisect_test {
     my ($type) = @_;
 
@@ -3174,166 +3155,57 @@ sub run_config_bisect_test {
     return $ret;
 }
 
-sub process_failed {
-    my ($config) = @_;
+sub config_bisect_end {
+    my ($good, $bad) = @_;
+    my $diffexec = "diff -u";
 
+    if (-f "$builddir/scripts/diffconfig") {
+       $diffexec = "$builddir/scripts/diffconfig";
+    }
     doprint "\n\n***************************************\n";
-    doprint "Found bad config: $config\n";
+    doprint "No more config bisecting possible.\n";
+    run_command "$diffexec $good $bad", 1;
     doprint "***************************************\n\n";
 }
 
-# used for config bisecting
-my $good_config;
-my $bad_config;
-
-sub process_new_config {
-    my ($tc, $nc, $gc, $bc) = @_;
-
-    my %tmp_config = %{$tc};
-    my %good_configs = %{$gc};
-    my %bad_configs = %{$bc};
-
-    my %new_configs;
-
-    my $runtest = 1;
-    my $ret;
-
-    create_config "tmp_configs", \%tmp_config;
-    assign_configs \%new_configs, $output_config;
-
-    $ret = compare_configs \%new_configs, \%bad_configs;
-    if (!$ret) {
-       doprint "New config equals bad config, try next test\n";
-       $runtest = 0;
-    }
-
-    if ($runtest) {
-       $ret = compare_configs \%new_configs, \%good_configs;
-       if (!$ret) {
-           doprint "New config equals good config, try next test\n";
-           $runtest = 0;
-       }
-    }
-
-    %{$nc} = %new_configs;
-
-    return $runtest;
-}
-
 sub run_config_bisect {
-    my ($pgood, $pbad) = @_;
-
-    my $type = $config_bisect_type;
-
-    my %good_configs = %{$pgood};
-    my %bad_configs = %{$pbad};
-
-    my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
-    my %b_configs = diff_configs \%good_configs, \%bad_configs;
-    my %g_configs = diff_configs \%bad_configs, \%good_configs;
-
-    my @diff_arr = keys %diff_configs;
-    my $len_diff = $#diff_arr + 1;
-
-    my @b_arr = keys %b_configs;
-    my $len_b = $#b_arr + 1;
-
-    my @g_arr = keys %g_configs;
-    my $len_g = $#g_arr + 1;
-
-    my $runtest = 1;
-    my %new_configs;
+    my ($good, $bad, $last_result) = @_;
+    my $reset = "";
+    my $cmd;
     my $ret;
 
-    # First, lets get it down to a single subset.
-    # Is the problem with a difference in values?
-    # Is the problem with a missing config?
-    # Is the problem with a config that breaks things?
-
-    # Enable all of one set and see if we get a new bad
-    # or good config.
-
-    # first set the good config to the bad values.
-
-    doprint "d=$len_diff g=$len_g b=$len_b\n";
-
-    # first lets enable things in bad config that are enabled in good config
-
-    if ($len_diff > 0) {
-       if ($len_b > 0 || $len_g > 0) {
-           my %tmp_config = %bad_configs;
-
-           doprint "Set tmp config to be bad config with good config values\n";
-           foreach my $item (@diff_arr) {
-               $tmp_config{$item} = $good_configs{$item};
-           }
-
-           $runtest = process_new_config \%tmp_config, \%new_configs,
-                           \%good_configs, \%bad_configs;
-       }
+    if (!length($last_result)) {
+       $reset = "-r";
     }
+    run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1;
 
-    if (!$runtest && $len_diff > 0) {
-
-       if ($len_diff == 1) {
-           process_failed $diff_arr[0];
-           return 1;
-       }
-       my %tmp_config = %bad_configs;
-
-       my $half = int($#diff_arr / 2);
-       my @tophalf = @diff_arr[0 .. $half];
-
-       doprint "Settings bisect with top half:\n";
-       doprint "Set tmp config to be bad config with some good config values\n";
-       foreach my $item (@tophalf) {
-           $tmp_config{$item} = $good_configs{$item};
-       }
-
-       $runtest = process_new_config \%tmp_config, \%new_configs,
-                           \%good_configs, \%bad_configs;
-
-       if (!$runtest) {
-           my %tmp_config = %bad_configs;
-
-           doprint "Try bottom half\n";
-
-           my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
-
-           foreach my $item (@bottomhalf) {
-               $tmp_config{$item} = $good_configs{$item};
-           }
-
-           $runtest = process_new_config \%tmp_config, \%new_configs,
-                           \%good_configs, \%bad_configs;
-       }
+    # config-bisect returns:
+    #   0 if there is more to bisect
+    #   1 for finding a good config
+    #   2 if it can not find any more configs
+    #  -1 (255) on error
+    if ($run_command_status) {
+       return $run_command_status;
     }
 
-    if ($runtest) {
-       $ret = run_config_bisect_test $type;
-       if ($ret) {
-           doprint "NEW GOOD CONFIG\n";
-           %good_configs = %new_configs;
-           run_command "mv $good_config ${good_config}.last";
-           save_config \%good_configs, $good_config;
-           %{$pgood} = %good_configs;
-       } else {
-           doprint "NEW BAD CONFIG\n";
-           %bad_configs = %new_configs;
-           run_command "mv $bad_config ${bad_config}.last";
-           save_config \%bad_configs, $bad_config;
-           %{$pbad} = %bad_configs;
-       }
-       return 0;
+    $ret = run_config_bisect_test $config_bisect_type;
+    if ($ret) {
+        doprint "NEW GOOD CONFIG\n";
+       # Return 3 for good config
+       return 3;
+    } else {
+        doprint "NEW BAD CONFIG\n";
+       # Return 4 for bad config
+       return 4;
     }
-
-    fail "Hmm, need to do a mix match?\n";
-    return -1;
 }
 
 sub config_bisect {
     my ($i) = @_;
 
+    my $good_config;
+    my $bad_config;
+
     my $type = $config_bisect_type;
     my $ret;
 
@@ -3353,6 +3225,24 @@ sub config_bisect {
        $good_config = $output_config;
     }
 
+    if (!defined($config_bisect_exec)) {
+       # First check the location that ktest.pl ran
+       my @locations = ( "$pwd/config-bisect.pl",
+                         "$dirname/config-bisect.pl",
+                         "$builddir/tools/testing/ktest/config-bisect.pl",
+                         undef );
+       foreach my $loc (@locations) {
+           doprint "loc = $loc\n";
+           $config_bisect_exec = $loc;
+           last if (defined($config_bisect_exec && -x $config_bisect_exec));
+       }
+       if (!defined($config_bisect_exec)) {
+           fail "Could not find an executable config-bisect.pl\n",
+               "  Set CONFIG_BISECT_EXEC to point to config-bisect.pl";
+           return 1;
+       }
+    }
+
     # we don't want min configs to cause issues here.
     doprint "Disabling 'MIN_CONFIG' for this test\n";
     undef $minconfig;
@@ -3361,21 +3251,31 @@ sub config_bisect {
     my %bad_configs;
     my %tmp_configs;
 
+    if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") {
+       if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") {
+           if (-f "$tmpdir/good_config.tmp") {
+               $good_config = "$tmpdir/good_config.tmp";
+           } else {
+               $good_config = "$tmpdir/good_config";
+           }
+           if (-f "$tmpdir/bad_config.tmp") {
+               $bad_config = "$tmpdir/bad_config.tmp";
+           } else {
+               $bad_config = "$tmpdir/bad_config";
+           }
+       }
+    }
     doprint "Run good configs through make oldconfig\n";
     assign_configs \%tmp_configs, $good_config;
     create_config "$good_config", \%tmp_configs;
-    assign_configs \%good_configs, $output_config;
+    $good_config = "$tmpdir/good_config";
+    system("cp $output_config $good_config") == 0 or dodie "cp good config";
 
     doprint "Run bad configs through make oldconfig\n";
     assign_configs \%tmp_configs, $bad_config;
     create_config "$bad_config", \%tmp_configs;
-    assign_configs \%bad_configs, $output_config;
-
-    $good_config = "$tmpdir/good_config";
     $bad_config = "$tmpdir/bad_config";
-
-    save_config \%good_configs, $good_config;
-    save_config \%bad_configs, $bad_config;
+    system("cp $output_config $bad_config") == 0 or dodie "cp bad config";
 
     if (defined($config_bisect_check) && $config_bisect_check ne "0") {
        if ($config_bisect_check ne "good") {
@@ -3398,10 +3298,21 @@ sub config_bisect {
        }
     }
 
+    my $last_run = "";
+
     do {
-       $ret = run_config_bisect \%good_configs, \%bad_configs;
+       $ret = run_config_bisect $good_config, $bad_config, $last_run;
+       if ($ret == 3) {
+           $last_run = "good";
+       } elsif ($ret == 4) {
+           $last_run = "bad";
+       }
        print_times;
-    } while (!$ret);
+    } while ($ret == 3 || $ret == 4);
+
+    if ($ret == 2) {
+        config_bisect_end "$good_config.tmp", "$bad_config.tmp";
+    }
 
     return $ret if ($ret < 0);
 
@@ -3416,9 +3327,9 @@ sub patchcheck_reboot {
 sub patchcheck {
     my ($i) = @_;
 
-    die "PATCHCHECK_START[$i] not defined\n"
+    dodie "PATCHCHECK_START[$i] not defined\n"
        if (!defined($patchcheck_start));
-    die "PATCHCHECK_TYPE[$i] not defined\n"
+    dodie "PATCHCHECK_TYPE[$i] not defined\n"
        if (!defined($patchcheck_type));
 
     my $start = $patchcheck_start;
@@ -3432,7 +3343,7 @@ sub patchcheck {
     if (defined($patchcheck_end)) {
        $end = $patchcheck_end;
     } elsif ($cherry) {
-       die "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
+       dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
     }
 
     # Get the true sha1's since we can use things like HEAD~3
@@ -3496,7 +3407,7 @@ sub patchcheck {
        doprint "\nProcessing commit \"$item\"\n\n";
 
        run_command "git checkout $sha1" or
-           die "Failed to checkout $sha1";
+           dodie "Failed to checkout $sha1";
 
        # only clean on the first and last patch
        if ($item eq $list[0] ||
@@ -3587,7 +3498,7 @@ sub read_kconfig {
     }
 
     open(KIN, "$kconfig")
-       or die "Can't open $kconfig";
+       or dodie "Can't open $kconfig";
     while (<KIN>) {
        chomp;
 
@@ -3746,7 +3657,7 @@ sub get_depends {
 
            $dep =~ s/^[^$valid]*[$valid]+//;
        } else {
-           die "this should never happen";
+           dodie "this should never happen";
        }
     }
 
@@ -4007,7 +3918,7 @@ sub make_min_config {
            # update new ignore configs
            if (defined($ignore_config)) {
                open (OUT, ">$temp_config")
-                   or die "Can't write to $temp_config";
+                   or dodie "Can't write to $temp_config";
                foreach my $config (keys %save_configs) {
                    print OUT "$save_configs{$config}\n";
                }
@@ -4035,7 +3946,7 @@ sub make_min_config {
 
            # Save off all the current mandatory configs
            open (OUT, ">$temp_config")
-               or die "Can't write to $temp_config";
+               or dodie "Can't write to $temp_config";
            foreach my $config (keys %keep_configs) {
                print OUT "$keep_configs{$config}\n";
            }
@@ -4222,6 +4133,74 @@ sub set_test_option {
     return eval_option($name, $option, $i);
 }
 
+sub find_mailer {
+    my ($mailer) = @_;
+
+    my @paths = split /:/, $ENV{PATH};
+
+    # sendmail is usually in /usr/sbin
+    $paths[$#paths + 1] = "/usr/sbin";
+
+    foreach my $path (@paths) {
+       if (-x "$path/$mailer") {
+           return $path;
+       }
+    }
+
+    return undef;
+}
+
+sub do_send_mail {
+    my ($subject, $message) = @_;
+
+    if (!defined($mail_path)) {
+       # find the mailer
+       $mail_path = find_mailer $mailer;
+       if (!defined($mail_path)) {
+           die "\nCan not find $mailer in PATH\n";
+       }
+    }
+
+    if (!defined($mail_command)) {
+       if ($mailer eq "mail" || $mailer eq "mailx") {
+           $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+       } elsif ($mailer eq "sendmail" ) {
+           $mail_command =  "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+       } else {
+           die "\nYour mailer: $mailer is not supported.\n";
+       }
+    }
+
+    $mail_command =~ s/\$MAILER/$mailer/g;
+    $mail_command =~ s/\$MAIL_PATH/$mail_path/g;
+    $mail_command =~ s/\$MAILTO/$mailto/g;
+    $mail_command =~ s/\$SUBJECT/$subject/g;
+    $mail_command =~ s/\$MESSAGE/$message/g;
+
+    run_command $mail_command;
+}
+
+sub send_email {
+
+    if (defined($mailto)) {
+       if (!defined($mailer)) {
+           doprint "No email sent: email or mailer not specified in config.\n";
+           return;
+       }
+       do_send_mail @_;
+    }
+}
+
+sub cancel_test {
+    if ($email_when_canceled) {
+        send_email("KTEST: Your [$test_type] test was cancelled",
+                "Your test started at $script_start_time was cancelled: sig int");
+    }
+    die "\nCaught Sig Int, test interrupted: $!\n"
+}
+
+$SIG{INT} = qw(cancel_test);
+
 # First we need to do is the builds
 for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
@@ -4245,11 +4224,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
     $outputdir = set_test_option("OUTPUT_DIR", $i);
     $builddir = set_test_option("BUILD_DIR", $i);
 
-    chdir $builddir || die "can't change directory to $builddir";
+    chdir $builddir || dodie "can't change directory to $builddir";
 
     if (!-d $outputdir) {
        mkpath($outputdir) or
-           die "can't create $outputdir";
+           dodie "can't create $outputdir";
     }
 
     $make = "$makecmd O=$outputdir";
@@ -4262,9 +4241,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
     $start_minconfig_defined = 1;
 
     # The first test may override the PRE_KTEST option
-    if (defined($pre_ktest) && $i == 1) {
-       doprint "\n";
-       run_command $pre_ktest;
+    if ($i == 1) {
+        if (defined($pre_ktest)) {
+            doprint "\n";
+            run_command $pre_ktest;
+        }
+        if ($email_when_started) {
+            send_email("KTEST: Your [$test_type] test was started",
+                "Your test was started on $script_start_time");
+        }
     }
 
     # Any test can override the POST_KTEST option
@@ -4280,7 +4265,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
     if (!-d $tmpdir) {
        mkpath($tmpdir) or
-           die "can't create $tmpdir";
+           dodie "can't create $tmpdir";
     }
 
     $ENV{"SSH_USER"} = $ssh_user;
@@ -4353,7 +4338,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
     if (defined($checkout)) {
        run_command "git checkout $checkout" or
-           die "failed to checkout $checkout";
+           dodie "failed to checkout $checkout";
     }
 
     $no_reboot = 0;
@@ -4428,4 +4413,8 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) {
 
 doprint "\n    $successes of $opt{NUM_TESTS} tests were successful\n\n";
 
+if ($email_when_finished) {
+    send_email("KTEST: Your [$test_type] test has finished!",
+            "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
+}
 exit 0;
index 6c58cd8bbbae71ad86592b4627be483879374a9c..6ca6ca0ce695a25d907bde88e8596a3e5b056a52 100644 (file)
@@ -1,6 +1,11 @@
 #
 # Config file for ktest.pl
 #
+# Place your customized version of this, in the working directory that
+# ktest.pl is run from. By default, ktest.pl will look for a file
+# called "ktest.conf", but you can name it anything you like and specify
+# the name of your config file as the first argument of ktest.pl.
+#
 # Note, all paths must be absolute
 #
 
 
 #### Optional Config Options (all have defaults) ####
 
+# Email options for receiving notifications. Users must setup
+# the specified mailer prior to using this feature.
+#
+# (default undefined)
+#MAILTO =
+#
+# Supported mailers: sendmail, mail, mailx
+# (default sendmail)
+#MAILER = sendmail
+#
+# The executable to run
+# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER})
+#MAIL_EXEC = /usr/sbin/sendmail
+#
+# The command used to send mail, which uses the above options
+# can be modified. By default if the mailer is "sendmail" then
+#  MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+# For mail or mailx:
+#  MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\'
+# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time
+#    it sends the mail if "$FOO" format is used. If "${FOO}" format is used,
+#    then the substitutions will occur at the time the config file is read.
+#    But note, MAIL_PATH and MAILER require being set by the config file if
+#     ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are.
+#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+#
+# Errors are defined as those would terminate the script
+# (default 1)
+#EMAIL_ON_ERROR = 1
+# (default 1)
+#EMAIL_WHEN_FINISHED = 1
+# (default 0)
+#EMAIL_WHEN_STARTED = 1
+#
+# Users can cancel the test by Ctrl^C
+# (default 0)
+#EMAIL_WHEN_CANCELED = 1
+
 # Start a test setup. If you leave this off, all options
 # will be default and the test will run once.
 # This is a label and not really an option (it takes no value).
 # (default 120)
 #TIMEOUT = 120
 
+# The timeout in seconds when to test if the box can be rebooted
+# or not. Before issuing the reboot command, a ssh connection
+# is attempted to see if the target machine is still active.
+# If the target does not connect within this timeout, a power cycle
+# is issued instead of a reboot.
+# CONNECT_TIMEOUT = 25
+
 # In between tests, a reboot of the box may occur, and this
 # is the time to wait for the console after it stops producing
 # output. Some machines may not produce a large lag on reboot
 #  Set it to "good" to test only the good config and set it
 #  to "bad" to only test the bad config.
 #
+# CONFIG_BISECT_EXEC (optional)
+#  The config bisect is a separate program that comes with ktest.pl.
+#  By befault, it will look for:
+#    `pwd`/config-bisect.pl # the location ktest.pl was executed from.
+#  If it does not find it there, it will look for:
+#    `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
+#  If it does not find it there, it will look for:
+#    ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl
+#  Setting CONFIG_BISECT_EXEC will override where it looks.
+#
 # Example:
 #   TEST_START
 #   TEST_TYPE = config_bisect
index 620fa78b3b1b33ab7e87f20e076bc6323b68e41f..cb166be4918d7713f801e95204559c0abd9e8d62 100644 (file)
@@ -104,7 +104,8 @@ enum {
        NUM_HINTS = 8,
        NUM_BDW = NUM_DCR,
        NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
-       NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
+       NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
+               + 4 /* spa1 iset */ + 1 /* spa11 iset */,
        DIMM_SIZE = SZ_32M,
        LABEL_SIZE = SZ_128K,
        SPA_VCD_SIZE = SZ_4M,
@@ -153,6 +154,7 @@ struct nfit_test {
        void *nfit_buf;
        dma_addr_t nfit_dma;
        size_t nfit_size;
+       size_t nfit_filled;
        int dcr_idx;
        int num_dcr;
        int num_pm;
@@ -709,7 +711,9 @@ static void smart_notify(struct device *bus_dev,
                                >= thresh->media_temperature)
                        || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
                                && smart->ctrl_temperature
-                               >= thresh->ctrl_temperature)) {
+                               >= thresh->ctrl_temperature)
+                       || (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH)
+                       || (smart->shutdown_state != 0)) {
                device_lock(bus_dev);
                __acpi_nvdimm_notify(dimm_dev, 0x81);
                device_unlock(bus_dev);
@@ -735,6 +739,32 @@ static int nfit_test_cmd_smart_set_threshold(
        return 0;
 }
 
+static int nfit_test_cmd_smart_inject(
+               struct nd_intel_smart_inject *inj,
+               unsigned int buf_len,
+               struct nd_intel_smart_threshold *thresh,
+               struct nd_intel_smart *smart,
+               struct device *bus_dev, struct device *dimm_dev)
+{
+       if (buf_len != sizeof(*inj))
+               return -EINVAL;
+
+       if (inj->mtemp_enable)
+               smart->media_temperature = inj->media_temperature;
+       if (inj->spare_enable)
+               smart->spares = inj->spares;
+       if (inj->fatal_enable)
+               smart->health = ND_INTEL_SMART_FATAL_HEALTH;
+       if (inj->unsafe_shutdown_enable) {
+               smart->shutdown_state = 1;
+               smart->shutdown_count++;
+       }
+       inj->status = 0;
+       smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+       return 0;
+}
+
 static void uc_error_notify(struct work_struct *work)
 {
        struct nfit_test *t = container_of(work, typeof(*t), work);
@@ -935,6 +965,13 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
                                                        t->dcr_idx],
                                                &t->smart[i - t->dcr_idx],
                                                &t->pdev.dev, t->dimm_dev[i]);
+                       case ND_INTEL_SMART_INJECT:
+                               return nfit_test_cmd_smart_inject(buf,
+                                               buf_len,
+                                               &t->smart_threshold[i -
+                                                       t->dcr_idx],
+                                               &t->smart[i - t->dcr_idx],
+                                               &t->pdev.dev, t->dimm_dev[i]);
                        default:
                                return -ENOTTY;
                        }
@@ -1222,7 +1259,7 @@ static void smart_init(struct nfit_test *t)
                        | ND_INTEL_SMART_MTEMP_VALID,
                .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
                .media_temperature = 23 * 16,
-               .ctrl_temperature = 30 * 16,
+               .ctrl_temperature = 25 * 16,
                .pmic_temperature = 40 * 16,
                .spares = 75,
                .alarm_flags = ND_INTEL_SMART_SPARE_TRIP
@@ -1366,7 +1403,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        struct acpi_nfit_data_region *bdw;
        struct acpi_nfit_flush_address *flush;
        struct acpi_nfit_capabilities *pcap;
-       unsigned int offset, i;
+       unsigned int offset = 0, i;
 
        /*
         * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -1380,93 +1417,102 @@ static void nfit_test0_setup(struct nfit_test *t)
        spa->range_index = 0+1;
        spa->address = t->spa_set_dma[0];
        spa->length = SPA0_SIZE;
+       offset += spa->header.length;
 
        /*
         * spa1 (interleave last half of the 4 DIMMS, note storage
         * does not actually alias the related block-data-window
         * regions)
         */
-       spa = nfit_buf + sizeof(*spa);
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
        spa->range_index = 1+1;
        spa->address = t->spa_set_dma[1];
        spa->length = SPA1_SIZE;
+       offset += spa->header.length;
 
        /* spa2 (dcr0) dimm0 */
-       spa = nfit_buf + sizeof(*spa) * 2;
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
        spa->range_index = 2+1;
        spa->address = t->dcr_dma[0];
        spa->length = DCR_SIZE;
+       offset += spa->header.length;
 
        /* spa3 (dcr1) dimm1 */
-       spa = nfit_buf + sizeof(*spa) * 3;
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
        spa->range_index = 3+1;
        spa->address = t->dcr_dma[1];
        spa->length = DCR_SIZE;
+       offset += spa->header.length;
 
        /* spa4 (dcr2) dimm2 */
-       spa = nfit_buf + sizeof(*spa) * 4;
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
        spa->range_index = 4+1;
        spa->address = t->dcr_dma[2];
        spa->length = DCR_SIZE;
+       offset += spa->header.length;
 
        /* spa5 (dcr3) dimm3 */
-       spa = nfit_buf + sizeof(*spa) * 5;
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
        spa->range_index = 5+1;
        spa->address = t->dcr_dma[3];
        spa->length = DCR_SIZE;
+       offset += spa->header.length;
 
        /* spa6 (bdw for dcr0) dimm0 */
-       spa = nfit_buf + sizeof(*spa) * 6;
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
        spa->range_index = 6+1;
        spa->address = t->dimm_dma[0];
        spa->length = DIMM_SIZE;
+       offset += spa->header.length;
 
        /* spa7 (bdw for dcr1) dimm1 */
-       spa = nfit_buf + sizeof(*spa) * 7;
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
        spa->range_index = 7+1;
        spa->address = t->dimm_dma[1];
        spa->length = DIMM_SIZE;
+       offset += spa->header.length;
 
        /* spa8 (bdw for dcr2) dimm2 */
-       spa = nfit_buf + sizeof(*spa) * 8;
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
        spa->range_index = 8+1;
        spa->address = t->dimm_dma[2];
        spa->length = DIMM_SIZE;
+       offset += spa->header.length;
 
        /* spa9 (bdw for dcr3) dimm3 */
-       spa = nfit_buf + sizeof(*spa) * 9;
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
        spa->range_index = 9+1;
        spa->address = t->dimm_dma[3];
        spa->length = DIMM_SIZE;
+       offset += spa->header.length;
 
-       offset = sizeof(*spa) * 10;
        /* mem-region0 (spa0, dimm0) */
        memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1481,9 +1527,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = 0;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 2;
+       offset += memdev->header.length;
 
        /* mem-region1 (spa0, dimm1) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map);
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[1];
@@ -1497,9 +1544,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->interleave_index = 0;
        memdev->interleave_ways = 2;
        memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+       offset += memdev->header.length;
 
        /* mem-region2 (spa1, dimm0) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[0];
@@ -1513,9 +1561,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->interleave_index = 0;
        memdev->interleave_ways = 4;
        memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+       offset += memdev->header.length;
 
        /* mem-region3 (spa1, dimm1) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[1];
@@ -1528,9 +1577,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = SPA0_SIZE/2;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 4;
+       offset += memdev->header.length;
 
        /* mem-region4 (spa1, dimm2) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[2];
@@ -1544,9 +1594,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->interleave_index = 0;
        memdev->interleave_ways = 4;
        memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+       offset += memdev->header.length;
 
        /* mem-region5 (spa1, dimm3) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[3];
@@ -1559,9 +1610,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = SPA0_SIZE/2;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 4;
+       offset += memdev->header.length;
 
        /* mem-region6 (spa/dcr0, dimm0) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[0];
@@ -1574,9 +1626,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = 0;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 1;
+       offset += memdev->header.length;
 
        /* mem-region7 (spa/dcr1, dimm1) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[1];
@@ -1589,9 +1642,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = 0;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 1;
+       offset += memdev->header.length;
 
        /* mem-region8 (spa/dcr2, dimm2) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[2];
@@ -1604,9 +1658,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = 0;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 1;
+       offset += memdev->header.length;
 
        /* mem-region9 (spa/dcr3, dimm3) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[3];
@@ -1619,9 +1674,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = 0;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 1;
+       offset += memdev->header.length;
 
        /* mem-region10 (spa/bdw0, dimm0) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[0];
@@ -1634,9 +1690,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = 0;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 1;
+       offset += memdev->header.length;
 
        /* mem-region11 (spa/bdw1, dimm1) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[1];
@@ -1649,9 +1706,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = 0;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 1;
+       offset += memdev->header.length;
 
        /* mem-region12 (spa/bdw2, dimm2) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[2];
@@ -1664,9 +1722,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->address = 0;
        memdev->interleave_index = 0;
        memdev->interleave_ways = 1;
+       offset += memdev->header.length;
 
        /* mem-region13 (spa/dcr3, dimm3) */
-       memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13;
+       memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
        memdev->device_handle = handle[3];
@@ -1680,12 +1739,12 @@ static void nfit_test0_setup(struct nfit_test *t)
        memdev->interleave_index = 0;
        memdev->interleave_ways = 1;
        memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+       offset += memdev->header.length;
 
-       offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
        /* dcr-descriptor0: blk */
        dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-       dcr->header.length = sizeof(struct acpi_nfit_control_region);
+       dcr->header.length = sizeof(*dcr);
        dcr->region_index = 0+1;
        dcr_common_init(dcr);
        dcr->serial_number = ~handle[0];
@@ -1696,11 +1755,12 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->command_size = 8;
        dcr->status_offset = 8;
        dcr->status_size = 4;
+       offset += dcr->header.length;
 
        /* dcr-descriptor1: blk */
-       dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region);
+       dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-       dcr->header.length = sizeof(struct acpi_nfit_control_region);
+       dcr->header.length = sizeof(*dcr);
        dcr->region_index = 1+1;
        dcr_common_init(dcr);
        dcr->serial_number = ~handle[1];
@@ -1711,11 +1771,12 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->command_size = 8;
        dcr->status_offset = 8;
        dcr->status_size = 4;
+       offset += dcr->header.length;
 
        /* dcr-descriptor2: blk */
-       dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2;
+       dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-       dcr->header.length = sizeof(struct acpi_nfit_control_region);
+       dcr->header.length = sizeof(*dcr);
        dcr->region_index = 2+1;
        dcr_common_init(dcr);
        dcr->serial_number = ~handle[2];
@@ -1726,11 +1787,12 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->command_size = 8;
        dcr->status_offset = 8;
        dcr->status_size = 4;
+       offset += dcr->header.length;
 
        /* dcr-descriptor3: blk */
-       dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3;
+       dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-       dcr->header.length = sizeof(struct acpi_nfit_control_region);
+       dcr->header.length = sizeof(*dcr);
        dcr->region_index = 3+1;
        dcr_common_init(dcr);
        dcr->serial_number = ~handle[3];
@@ -1741,8 +1803,8 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->command_size = 8;
        dcr->status_offset = 8;
        dcr->status_size = 4;
+       offset += dcr->header.length;
 
-       offset = offset + sizeof(struct acpi_nfit_control_region) * 4;
        /* dcr-descriptor0: pmem */
        dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1753,10 +1815,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->serial_number = ~handle[0];
        dcr->code = NFIT_FIC_BYTEN;
        dcr->windows = 0;
+       offset += dcr->header.length;
 
        /* dcr-descriptor1: pmem */
-       dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-                       window_size);
+       dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
        dcr->header.length = offsetof(struct acpi_nfit_control_region,
                        window_size);
@@ -1765,10 +1827,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->serial_number = ~handle[1];
        dcr->code = NFIT_FIC_BYTEN;
        dcr->windows = 0;
+       offset += dcr->header.length;
 
        /* dcr-descriptor2: pmem */
-       dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-                       window_size) * 2;
+       dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
        dcr->header.length = offsetof(struct acpi_nfit_control_region,
                        window_size);
@@ -1777,10 +1839,10 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->serial_number = ~handle[2];
        dcr->code = NFIT_FIC_BYTEN;
        dcr->windows = 0;
+       offset += dcr->header.length;
 
        /* dcr-descriptor3: pmem */
-       dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-                       window_size) * 3;
+       dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
        dcr->header.length = offsetof(struct acpi_nfit_control_region,
                        window_size);
@@ -1789,54 +1851,56 @@ static void nfit_test0_setup(struct nfit_test *t)
        dcr->serial_number = ~handle[3];
        dcr->code = NFIT_FIC_BYTEN;
        dcr->windows = 0;
+       offset += dcr->header.length;
 
-       offset = offset + offsetof(struct acpi_nfit_control_region,
-                       window_size) * 4;
        /* bdw0 (spa/dcr0, dimm0) */
        bdw = nfit_buf + offset;
        bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-       bdw->header.length = sizeof(struct acpi_nfit_data_region);
+       bdw->header.length = sizeof(*bdw);
        bdw->region_index = 0+1;
        bdw->windows = 1;
        bdw->offset = 0;
        bdw->size = BDW_SIZE;
        bdw->capacity = DIMM_SIZE;
        bdw->start_address = 0;
+       offset += bdw->header.length;
 
        /* bdw1 (spa/dcr1, dimm1) */
-       bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region);
+       bdw = nfit_buf + offset;
        bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-       bdw->header.length = sizeof(struct acpi_nfit_data_region);
+       bdw->header.length = sizeof(*bdw);
        bdw->region_index = 1+1;
        bdw->windows = 1;
        bdw->offset = 0;
        bdw->size = BDW_SIZE;
        bdw->capacity = DIMM_SIZE;
        bdw->start_address = 0;
+       offset += bdw->header.length;
 
        /* bdw2 (spa/dcr2, dimm2) */
-       bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2;
+       bdw = nfit_buf + offset;
        bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-       bdw->header.length = sizeof(struct acpi_nfit_data_region);
+       bdw->header.length = sizeof(*bdw);
        bdw->region_index = 2+1;
        bdw->windows = 1;
        bdw->offset = 0;
        bdw->size = BDW_SIZE;
        bdw->capacity = DIMM_SIZE;
        bdw->start_address = 0;
+       offset += bdw->header.length;
 
        /* bdw3 (spa/dcr3, dimm3) */
-       bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3;
+       bdw = nfit_buf + offset;
        bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-       bdw->header.length = sizeof(struct acpi_nfit_data_region);
+       bdw->header.length = sizeof(*bdw);
        bdw->region_index = 3+1;
        bdw->windows = 1;
        bdw->offset = 0;
        bdw->size = BDW_SIZE;
        bdw->capacity = DIMM_SIZE;
        bdw->start_address = 0;
+       offset += bdw->header.length;
 
-       offset = offset + sizeof(struct acpi_nfit_data_region) * 4;
        /* flush0 (dimm0) */
        flush = nfit_buf + offset;
        flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -1845,48 +1909,52 @@ static void nfit_test0_setup(struct nfit_test *t)
        flush->hint_count = NUM_HINTS;
        for (i = 0; i < NUM_HINTS; i++)
                flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
+       offset += flush->header.length;
 
        /* flush1 (dimm1) */
-       flush = nfit_buf + offset + flush_hint_size * 1;
+       flush = nfit_buf + offset;
        flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
        flush->header.length = flush_hint_size;
        flush->device_handle = handle[1];
        flush->hint_count = NUM_HINTS;
        for (i = 0; i < NUM_HINTS; i++)
                flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
+       offset += flush->header.length;
 
        /* flush2 (dimm2) */
-       flush = nfit_buf + offset + flush_hint_size  * 2;
+       flush = nfit_buf + offset;
        flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
        flush->header.length = flush_hint_size;
        flush->device_handle = handle[2];
        flush->hint_count = NUM_HINTS;
        for (i = 0; i < NUM_HINTS; i++)
                flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
+       offset += flush->header.length;
 
        /* flush3 (dimm3) */
-       flush = nfit_buf + offset + flush_hint_size * 3;
+       flush = nfit_buf + offset;
        flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
        flush->header.length = flush_hint_size;
        flush->device_handle = handle[3];
        flush->hint_count = NUM_HINTS;
        for (i = 0; i < NUM_HINTS; i++)
                flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
+       offset += flush->header.length;
 
        /* platform capabilities */
-       pcap = nfit_buf + offset + flush_hint_size * 4;
+       pcap = nfit_buf + offset;
        pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
        pcap->header.length = sizeof(*pcap);
        pcap->highest_capability = 1;
        pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
                ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+       offset += pcap->header.length;
 
        if (t->setup_hotplug) {
-               offset = offset + flush_hint_size * 4 + sizeof(*pcap);
                /* dcr-descriptor4: blk */
                dcr = nfit_buf + offset;
                dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-               dcr->header.length = sizeof(struct acpi_nfit_control_region);
+               dcr->header.length = sizeof(*dcr);
                dcr->region_index = 8+1;
                dcr_common_init(dcr);
                dcr->serial_number = ~handle[4];
@@ -1897,8 +1965,8 @@ static void nfit_test0_setup(struct nfit_test *t)
                dcr->command_size = 8;
                dcr->status_offset = 8;
                dcr->status_size = 4;
+               offset += dcr->header.length;
 
-               offset = offset + sizeof(struct acpi_nfit_control_region);
                /* dcr-descriptor4: pmem */
                dcr = nfit_buf + offset;
                dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1909,21 +1977,20 @@ static void nfit_test0_setup(struct nfit_test *t)
                dcr->serial_number = ~handle[4];
                dcr->code = NFIT_FIC_BYTEN;
                dcr->windows = 0;
+               offset += dcr->header.length;
 
-               offset = offset + offsetof(struct acpi_nfit_control_region,
-                               window_size);
                /* bdw4 (spa/dcr4, dimm4) */
                bdw = nfit_buf + offset;
                bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-               bdw->header.length = sizeof(struct acpi_nfit_data_region);
+               bdw->header.length = sizeof(*bdw);
                bdw->region_index = 8+1;
                bdw->windows = 1;
                bdw->offset = 0;
                bdw->size = BDW_SIZE;
                bdw->capacity = DIMM_SIZE;
                bdw->start_address = 0;
+               offset += bdw->header.length;
 
-               offset = offset + sizeof(struct acpi_nfit_data_region);
                /* spa10 (dcr4) dimm4 */
                spa = nfit_buf + offset;
                spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
@@ -1932,30 +1999,32 @@ static void nfit_test0_setup(struct nfit_test *t)
                spa->range_index = 10+1;
                spa->address = t->dcr_dma[4];
                spa->length = DCR_SIZE;
+               offset += spa->header.length;
 
                /*
                 * spa11 (single-dimm interleave for hotplug, note storage
                 * does not actually alias the related block-data-window
                 * regions)
                 */
-               spa = nfit_buf + offset + sizeof(*spa);
+               spa = nfit_buf + offset;
                spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
                spa->header.length = sizeof(*spa);
                memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
                spa->range_index = 11+1;
                spa->address = t->spa_set_dma[2];
                spa->length = SPA0_SIZE;
+               offset += spa->header.length;
 
                /* spa12 (bdw for dcr4) dimm4 */
-               spa = nfit_buf + offset + sizeof(*spa) * 2;
+               spa = nfit_buf + offset;
                spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
                spa->header.length = sizeof(*spa);
                memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
                spa->range_index = 12+1;
                spa->address = t->dimm_dma[4];
                spa->length = DIMM_SIZE;
+               offset += spa->header.length;
 
-               offset = offset + sizeof(*spa) * 3;
                /* mem-region14 (spa/dcr4, dimm4) */
                memdev = nfit_buf + offset;
                memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1970,10 +2039,10 @@ static void nfit_test0_setup(struct nfit_test *t)
                memdev->address = 0;
                memdev->interleave_index = 0;
                memdev->interleave_ways = 1;
+               offset += memdev->header.length;
 
-               /* mem-region15 (spa0, dimm4) */
-               memdev = nfit_buf + offset +
-                               sizeof(struct acpi_nfit_memory_map);
+               /* mem-region15 (spa11, dimm4) */
+               memdev = nfit_buf + offset;
                memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
                memdev->header.length = sizeof(*memdev);
                memdev->device_handle = handle[4];
@@ -1987,10 +2056,10 @@ static void nfit_test0_setup(struct nfit_test *t)
                memdev->interleave_index = 0;
                memdev->interleave_ways = 1;
                memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+               offset += memdev->header.length;
 
                /* mem-region16 (spa/bdw4, dimm4) */
-               memdev = nfit_buf + offset +
-                               sizeof(struct acpi_nfit_memory_map) * 2;
+               memdev = nfit_buf + offset;
                memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
                memdev->header.length = sizeof(*memdev);
                memdev->device_handle = handle[4];
@@ -2003,8 +2072,8 @@ static void nfit_test0_setup(struct nfit_test *t)
                memdev->address = 0;
                memdev->interleave_index = 0;
                memdev->interleave_ways = 1;
+               offset += memdev->header.length;
 
-               offset = offset + sizeof(struct acpi_nfit_memory_map) * 3;
                /* flush3 (dimm4) */
                flush = nfit_buf + offset;
                flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -2014,8 +2083,14 @@ static void nfit_test0_setup(struct nfit_test *t)
                for (i = 0; i < NUM_HINTS; i++)
                        flush->hint_address[i] = t->flush_dma[4]
                                + i * sizeof(u64);
+               offset += flush->header.length;
+
+               /* sanity check to make sure we've filled the buffer */
+               WARN_ON(offset != t->nfit_size);
        }
 
+       t->nfit_filled = offset;
+
        post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
                        SPA0_SIZE);
 
@@ -2026,6 +2101,7 @@ static void nfit_test0_setup(struct nfit_test *t)
        set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
        set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
        set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en);
        set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
@@ -2061,17 +2137,18 @@ static void nfit_test1_setup(struct nfit_test *t)
        spa->range_index = 0+1;
        spa->address = t->spa_set_dma[0];
        spa->length = SPA2_SIZE;
+       offset += spa->header.length;
 
        /* virtual cd region */
-       spa = nfit_buf + sizeof(*spa);
+       spa = nfit_buf + offset;
        spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
        spa->header.length = sizeof(*spa);
        memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
        spa->range_index = 0;
        spa->address = t->spa_set_dma[1];
        spa->length = SPA_VCD_SIZE;
+       offset += spa->header.length;
 
-       offset += sizeof(*spa) * 2;
        /* mem-region0 (spa0, dimm0) */
        memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -2089,8 +2166,8 @@ static void nfit_test1_setup(struct nfit_test *t)
        memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
                | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
                | ACPI_NFIT_MEM_NOT_ARMED;
+       offset += memdev->header.length;
 
-       offset += sizeof(*memdev);
        /* dcr-descriptor0 */
        dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -2101,8 +2178,8 @@ static void nfit_test1_setup(struct nfit_test *t)
        dcr->serial_number = ~handle[5];
        dcr->code = NFIT_FIC_BYTE;
        dcr->windows = 0;
-
        offset += dcr->header.length;
+
        memdev = nfit_buf + offset;
        memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
        memdev->header.length = sizeof(*memdev);
@@ -2117,9 +2194,9 @@ static void nfit_test1_setup(struct nfit_test *t)
        memdev->interleave_index = 0;
        memdev->interleave_ways = 1;
        memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
+       offset += memdev->header.length;
 
        /* dcr-descriptor1 */
-       offset += sizeof(*memdev);
        dcr = nfit_buf + offset;
        dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
        dcr->header.length = offsetof(struct acpi_nfit_control_region,
@@ -2129,6 +2206,12 @@ static void nfit_test1_setup(struct nfit_test *t)
        dcr->serial_number = ~handle[6];
        dcr->code = NFIT_FIC_BYTE;
        dcr->windows = 0;
+       offset += dcr->header.length;
+
+       /* sanity check to make sure we've filled the buffer */
+       WARN_ON(offset != t->nfit_size);
+
+       t->nfit_filled = offset;
 
        post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
                        SPA2_SIZE);
@@ -2487,7 +2570,7 @@ static int nfit_test_probe(struct platform_device *pdev)
        nd_desc->ndctl = nfit_test_ctl;
 
        rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
-                       nfit_test->nfit_size);
+                       nfit_test->nfit_filled);
        if (rc)
                return rc;
 
index 428344519cdf72cd32747e3457ab0f50d722fb2d..33752e06ff8d0719568d3c07bc56596819995419 100644 (file)
@@ -93,6 +93,7 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_FW_FINISH_UPDATE      15
 #define ND_INTEL_FW_FINISH_QUERY       16
 #define ND_INTEL_SMART_SET_THRESHOLD   17
+#define ND_INTEL_SMART_INJECT          18
 
 #define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
 #define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
@@ -111,6 +112,10 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_SMART_NON_CRITICAL_HEALTH      (1 << 0)
 #define ND_INTEL_SMART_CRITICAL_HEALTH          (1 << 1)
 #define ND_INTEL_SMART_FATAL_HEALTH             (1 << 2)
+#define ND_INTEL_SMART_INJECT_MTEMP            (1 << 0)
+#define ND_INTEL_SMART_INJECT_SPARE            (1 << 1)
+#define ND_INTEL_SMART_INJECT_FATAL            (1 << 2)
+#define ND_INTEL_SMART_INJECT_SHUTDOWN         (1 << 3)
 
 struct nd_intel_smart {
        __u32 status;
@@ -158,6 +163,17 @@ struct nd_intel_smart_set_threshold {
        __u32 status;
 } __packed;
 
+struct nd_intel_smart_inject {
+       __u64 flags;
+       __u8 mtemp_enable;
+       __u16 media_temperature;
+       __u8 spare_enable;
+       __u8 spares;
+       __u8 fatal_enable;
+       __u8 unsafe_shutdown_enable;
+       __u32 status;
+} __packed;
+
 #define INTEL_FW_STORAGE_SIZE          0x100000
 #define INTEL_FW_MAX_SEND_LEN          0xFFEC
 #define INTEL_FW_QUERY_INTERVAL                250000
index e3201ccf54c3c46dc7ffaf9bc6185edd406a6b95..32159c08a52e5bb220792b9e7652742885f2960d 100644 (file)
@@ -19,6 +19,7 @@
 
 #define __GFP_RECLAIM  (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
 
+#define GFP_ZONEMASK   0x0fu
 #define GFP_ATOMIC     (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL     (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 #define GFP_NOWAIT     (__GFP_KSWAPD_RECLAIM)
index 2fc410bc4f331499279f1e2b909693d9eda2aefa..32aafa92074c5049fa5bd5fbc08432d3603278dc 100644 (file)
@@ -25,6 +25,7 @@ TARGETS += mqueue
 TARGETS += net
 TARGETS += nsfs
 TARGETS += powerpc
+TARGETS += proc
 TARGETS += pstore
 TARGETS += ptrace
 TARGETS += seccomp
index df3dd7fe5f9b2f9a2de3fcd768fcdf5c90990fa7..2a4f16fc9819dad0cb39039d1947231d98a031db 100644 (file)
@@ -59,6 +59,13 @@ disable_events() {
     echo 0 > events/enable
 }
 
+clear_synthetic_events() { # reset all current synthetic events
+    grep -v ^# synthetic_events |
+    while read line; do
+        echo "!$line" >> synthetic_events
+    done
+}
+
 initialize_ftrace() { # Reset ftrace to initial-state
 # As the initial state, ftrace will be set to nop tracer,
 # no events, no triggers, no filters, no function filters,
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
new file mode 100644 (file)
index 0000000..786dce7
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/sh
+# description: event trigger - test extended error support
+
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test extended error support"
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null
+if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
+    fail "Failed to generate extended error in histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
new file mode 100644 (file)
index 0000000..7fd5b4a
--- /dev/null
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test field variable support
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test field variable support"
+
+echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
+echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+    fail "Failed to create inter-event histogram"
+fi
+
+if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+    fail "Failed to create histogram with field variable"
+fi
+
+echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+
+if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+    fail "Failed to remove histogram with field variable"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
new file mode 100644 (file)
index 0000000..c93dbe3
--- /dev/null
@@ -0,0 +1,58 @@
+#!/bin/sh
+# description: event trigger - test inter-event combined histogram trigger
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+clear_synthetic_events
+
+echo "Test create synthetic event"
+
+echo 'waking_latency  u64 lat pid_t pid' > synthetic_events
+if [ ! -d events/synthetic/waking_latency ]; then
+    fail "Failed to create waking_latency synthetic event"
+fi
+
+echo "Test combined histogram"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger
+
+echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
+echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
+
+echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+    fail "Failed to create combined histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
new file mode 100644 (file)
index 0000000..e84e7d0
--- /dev/null
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+    fail "Failed to create onmatch action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
new file mode 100644 (file)
index 0000000..7907d8a
--- /dev/null
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch-onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
+    fail "Failed to create onmatch-onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
new file mode 100644 (file)
index 0000000..38b7ed6
--- /dev/null
@@ -0,0 +1,48 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmax action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+ping localhost -c 3
+if ! grep -q "max:" events/sched/sched_switch/hist; then
+    fail "Failed to create onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
new file mode 100644 (file)
index 0000000..cef1137
--- /dev/null
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test synthetic event create remove
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency  u64 lat pid_t pid char' > synthetic_events > /dev/null
+if [ -d events/synthetic/wakeup_latency ]; then
+    fail "Created wakeup_latency synthetic event with an invalid format"
+fi
+
+reset_trigger
+
+echo "Test remove synthetic event"
+echo '!wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to delete wakeup_latency synthetic event"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
new file mode 100644 (file)
index 0000000..6c16f77
--- /dev/null
@@ -0,0 +1,8 @@
+/proc-loadavg-001
+/proc-self-map-files-001
+/proc-self-map-files-002
+/proc-self-syscall
+/proc-self-wchan
+/proc-uptime-001
+/proc-uptime-002
+/read
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
new file mode 100644 (file)
index 0000000..dbb87e5
--- /dev/null
@@ -0,0 +1,13 @@
+CFLAGS += -Wall -O2
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-self-map-files-001
+TEST_GEN_PROGS += proc-self-map-files-002
+TEST_GEN_PROGS += proc-self-syscall
+TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-uptime-001
+TEST_GEN_PROGS += proc-uptime-002
+TEST_GEN_PROGS += read
+
+include ../lib.mk
diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config
new file mode 100644 (file)
index 0000000..68fbd2b
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_PROC_FS=y
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
new file mode 100644 (file)
index 0000000..e38ad6d
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+int main(void)
+{
+       pid_t pid;
+       int wstatus;
+
+       if (unshare(CLONE_NEWPID) == -1) {
+               if (errno == ENOSYS || errno == EPERM)
+                       return 2;
+               return 1;
+       }
+
+       pid = fork();
+       if (pid == -1)
+               return 1;
+       if (pid == 0) {
+               char buf[128], *p;
+               int fd;
+               ssize_t rv;
+
+               fd = open("/proc/loadavg" , O_RDONLY);
+               if (fd == -1)
+                       return 1;
+               rv = read(fd, buf, sizeof(buf));
+               if (rv < 3)
+                       return 1;
+               p = buf + rv;
+
+               /* pid 1 */
+               if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n'))
+                       return 1;
+
+               pid = fork();
+               if (pid == -1)
+                       return 1;
+               if (pid == 0)
+                       return 0;
+               if (waitpid(pid, NULL, 0) == -1)
+                       return 1;
+
+               lseek(fd, 0, SEEK_SET);
+               rv = read(fd, buf, sizeof(buf));
+               if (rv < 3)
+                       return 1;
+               p = buf + rv;
+
+               /* pid 2 */
+               if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n'))
+                       return 1;
+
+               return 0;
+       }
+
+       if (waitpid(pid, &wstatus, 0) == -1)
+               return 1;
+       if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0)
+               return 0;
+       return 1;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c
new file mode 100644 (file)
index 0000000..af1d0a6
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+       char name[64];
+       char buf[64];
+
+       snprintf(name, sizeof(name), fmt, a, b);
+       if (readlink(name, buf, sizeof(buf)) == -1)
+               exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+       char name[64];
+       char buf[64];
+
+       snprintf(name, sizeof(name), fmt, a, b);
+       if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+               return;
+       exit(1);
+}
+
+int main(void)
+{
+       const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+       void *p;
+       int fd;
+       unsigned long a, b;
+
+       fd = open("/dev/zero", O_RDONLY);
+       if (fd == -1)
+               return 1;
+
+       p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0);
+       if (p == MAP_FAILED)
+               return 1;
+
+       a = (unsigned long)p;
+       b = (unsigned long)p + PAGE_SIZE;
+
+       pass("/proc/self/map_files/%lx-%lx", a, b);
+       fail("/proc/self/map_files/ %lx-%lx", a, b);
+       fail("/proc/self/map_files/%lx -%lx", a, b);
+       fail("/proc/self/map_files/%lx- %lx", a, b);
+       fail("/proc/self/map_files/%lx-%lx ", a, b);
+       fail("/proc/self/map_files/0%lx-%lx", a, b);
+       fail("/proc/self/map_files/%lx-0%lx", a, b);
+       if (sizeof(long) == 4) {
+               fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+               fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+       } else if (sizeof(long) == 8) {
+               fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+               fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+       } else
+               return 1;
+
+       return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
new file mode 100644 (file)
index 0000000..aebf4be
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... with address 0. */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+       char name[64];
+       char buf[64];
+
+       snprintf(name, sizeof(name), fmt, a, b);
+       if (readlink(name, buf, sizeof(buf)) == -1)
+               exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+       char name[64];
+       char buf[64];
+
+       snprintf(name, sizeof(name), fmt, a, b);
+       if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+               return;
+       exit(1);
+}
+
+int main(void)
+{
+       const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+       void *p;
+       int fd;
+       unsigned long a, b;
+
+       fd = open("/dev/zero", O_RDONLY);
+       if (fd == -1)
+               return 1;
+
+       p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+       if (p == MAP_FAILED) {
+               if (errno == EPERM)
+                       return 2;
+               return 1;
+       }
+
+       a = (unsigned long)p;
+       b = (unsigned long)p + PAGE_SIZE;
+
+       pass("/proc/self/map_files/%lx-%lx", a, b);
+       fail("/proc/self/map_files/ %lx-%lx", a, b);
+       fail("/proc/self/map_files/%lx -%lx", a, b);
+       fail("/proc/self/map_files/%lx- %lx", a, b);
+       fail("/proc/self/map_files/%lx-%lx ", a, b);
+       fail("/proc/self/map_files/0%lx-%lx", a, b);
+       fail("/proc/self/map_files/%lx-0%lx", a, b);
+       if (sizeof(long) == 4) {
+               fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+               fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+       } else if (sizeof(long) == 8) {
+               fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+               fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+       } else
+               return 1;
+
+       return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
new file mode 100644 (file)
index 0000000..05eb6f9
--- /dev/null
@@ -0,0 +1,45 @@
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+static inline ssize_t sys_read(int fd, void *buf, size_t len)
+{
+       return syscall(SYS_read, fd, buf, len);
+}
+
+int main(void)
+{
+       char buf1[64];
+       char buf2[64];
+       int fd;
+       ssize_t rv;
+
+       fd = open("/proc/self/syscall", O_RDONLY);
+       if (fd == -1) {
+               if (errno == ENOENT)
+                       return 2;
+               return 1;
+       }
+
+       /* Do direct system call as libc can wrap anything. */
+       snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx",
+                (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2));
+
+       memset(buf2, 0, sizeof(buf2));
+       rv = sys_read(fd, buf2, sizeof(buf2));
+       if (rv < 0)
+               return 1;
+       if (rv < strlen(buf1))
+               return 1;
+       if (strncmp(buf1, buf2, strlen(buf1)) != 0)
+               return 1;
+
+       return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c
new file mode 100644 (file)
index 0000000..b8d8728
--- /dev/null
@@ -0,0 +1,25 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+       char buf[64];
+       int fd;
+
+       fd = open("/proc/self/wchan", O_RDONLY);
+       if (fd == -1) {
+               if (errno == ENOENT)
+                       return 2;
+               return 1;
+       }
+
+       buf[0] = '\0';
+       if (read(fd, buf, sizeof(buf)) != 1)
+               return 1;
+       if (buf[0] != '0')
+               return 1;
+       return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c
new file mode 100644 (file)
index 0000000..303f260
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically.
+#undef NDEBUG
+#include <assert.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+int main(void)
+{
+       uint64_t start, u0, u1, i0, i1;
+       int fd;
+
+       fd = open("/proc/uptime", O_RDONLY);
+       assert(fd >= 0);
+
+       proc_uptime(fd, &u0, &i0);
+       start = u0;
+       do {
+               proc_uptime(fd, &u1, &i1);
+               assert(u1 >= u0);
+               assert(i1 >= i0);
+               u0 = u1;
+               i0 = i1;
+       } while (u1 - start < 100);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
new file mode 100644 (file)
index 0000000..0cb79e1
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically
+// while shifting across CPUs.
+#define _GNU_SOURCE
+#undef NDEBUG
+#include <assert.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+       return syscall(SYS_sched_getaffinity, pid, len, m);
+}
+
+static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+       return syscall(SYS_sched_setaffinity, pid, len, m);
+}
+
+int main(void)
+{
+       unsigned int len;
+       unsigned long *m;
+       unsigned int cpu;
+       uint64_t u0, u1, i0, i1;
+       int fd;
+
+       /* find out "nr_cpu_ids" */
+       m = NULL;
+       len = 0;
+       do {
+               len += sizeof(unsigned long);
+               free(m);
+               m = malloc(len);
+       } while (sys_sched_getaffinity(0, len, m) == -EINVAL);
+
+       fd = open("/proc/uptime", O_RDONLY);
+       assert(fd >= 0);
+
+       proc_uptime(fd, &u0, &i0);
+       for (cpu = 0; cpu < len * 8; cpu++) {
+               memset(m, 0, len);
+               m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
+
+               /* CPU might not exist, ignore error */
+               sys_sched_setaffinity(0, len, m);
+
+               proc_uptime(fd, &u1, &i1);
+               assert(u1 >= u0);
+               assert(i1 >= i0);
+               u0 = u1;
+               i0 = i1;
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
new file mode 100644 (file)
index 0000000..d584419
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static unsigned long long xstrtoull(const char *p, char **end)
+{
+       if (*p == '0') {
+               *end = (char *)p + 1;
+               return 0;
+       } else if ('1' <= *p && *p <= '9') {
+               unsigned long long val;
+
+               errno = 0;
+               val = strtoull(p, end, 10);
+               assert(errno == 0);
+               return val;
+       } else
+               assert(0);
+}
+
+static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
+{
+       uint64_t val1, val2;
+       char buf[64], *p;
+       ssize_t rv;
+
+       /* save "p < end" checks */
+       memset(buf, 0, sizeof(buf));
+       rv = pread(fd, buf, sizeof(buf), 0);
+       assert(0 <= rv && rv <= sizeof(buf));
+       buf[sizeof(buf) - 1] = '\0';
+
+       p = buf;
+
+       val1 = xstrtoull(p, &p);
+       assert(p[0] == '.');
+       assert('0' <= p[1] && p[1] <= '9');
+       assert('0' <= p[2] && p[2] <= '9');
+       assert(p[3] == ' ');
+
+       val2 = (p[1] - '0') * 10 + p[2] - '0';
+       *uptime = val1 * 100 + val2;
+
+       p += 4;
+
+       val1 = xstrtoull(p, &p);
+       assert(p[0] == '.');
+       assert('0' <= p[1] && p[1] <= '9');
+       assert('0' <= p[2] && p[2] <= '9');
+       assert(p[3] == '\n');
+
+       val2 = (p[1] - '0') * 10 + p[2] - '0';
+       *idle = val1 * 100 + val2;
+
+       assert(p + 4 == buf + rv);
+}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
new file mode 100644 (file)
index 0000000..12e397f
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test
+// 1) read of every file in /proc
+// 2) readlink of every symlink in /proc
+// 3) recursively (1) + (2) for every directory in /proc
+// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
+// 5) write to /proc/sysrq-trigger
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static inline bool streq(const char *s1, const char *s2)
+{
+       return strcmp(s1, s2) == 0;
+}
+
+static struct dirent *xreaddir(DIR *d)
+{
+       struct dirent *de;
+
+       errno = 0;
+       de = readdir(d);
+       if (!de && errno != 0) {
+               exit(1);
+       }
+       return de;
+}
+
+static void f_reg(DIR *d, const char *filename)
+{
+       char buf[4096];
+       int fd;
+       ssize_t rv;
+
+       /* read from /proc/kmsg can block */
+       fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
+       if (fd == -1)
+               return;
+       rv = read(fd, buf, sizeof(buf));
+       assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+       close(fd);
+}
+
+static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len)
+{
+       int fd;
+       ssize_t rv;
+
+       fd = openat(dirfd(d), filename, O_WRONLY);
+       if (fd == -1)
+               return;
+       rv = write(fd, buf, len);
+       assert((0 <= rv && rv <= len) || rv == -1);
+       close(fd);
+}
+
+static void f_lnk(DIR *d, const char *filename)
+{
+       char buf[4096];
+       ssize_t rv;
+
+       rv = readlinkat(dirfd(d), filename, buf, sizeof(buf));
+       assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+}
+
+static void f(DIR *d, unsigned int level)
+{
+       struct dirent *de;
+
+       de = xreaddir(d);
+       assert(de->d_type == DT_DIR);
+       assert(streq(de->d_name, "."));
+
+       de = xreaddir(d);
+       assert(de->d_type == DT_DIR);
+       assert(streq(de->d_name, ".."));
+
+       while ((de = xreaddir(d))) {
+               assert(!streq(de->d_name, "."));
+               assert(!streq(de->d_name, ".."));
+
+               switch (de->d_type) {
+                       DIR *dd;
+                       int fd;
+
+               case DT_REG:
+                       if (level == 0 && streq(de->d_name, "sysrq-trigger")) {
+                               f_reg_write(d, de->d_name, "h", 1);
+                       } else if (level == 1 && streq(de->d_name, "clear_refs")) {
+                               f_reg_write(d, de->d_name, "1", 1);
+                       } else if (level == 3 && streq(de->d_name, "clear_refs")) {
+                               f_reg_write(d, de->d_name, "1", 1);
+                       } else {
+                               f_reg(d, de->d_name);
+                       }
+                       break;
+               case DT_DIR:
+                       fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY);
+                       if (fd == -1)
+                               continue;
+                       dd = fdopendir(fd);
+                       if (!dd)
+                               continue;
+                       f(dd, level + 1);
+                       closedir(dd);
+                       break;
+               case DT_LNK:
+                       f_lnk(d, de->d_name);
+                       break;
+               default:
+                       assert(0);
+               }
+       }
+}
+
+int main(void)
+{
+       DIR *d;
+
+       d = opendir("/proc");
+       if (!d)
+               return 2;
+       f(d, 0);
+       return 0;
+}