Merge branch 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 11 Nov 2018 22:18:10 +0000 (16:18 -0600)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 11 Nov 2018 22:18:10 +0000 (16:18 -0600)
Pull locking build fix from Thomas Gleixner:
 "A single fix for a build fail with CONFIG_PROFILE_ALL_BRANCHES=y in
  the qspinlock code"

* 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/qspinlock: Fix compile error

892 files changed:
.mailmap
Documentation/ABI/testing/sysfs-class-led-trigger-pattern
Documentation/ABI/testing/sysfs-platform-lg-laptop [new file with mode: 0644]
Documentation/admin-guide/cgroup-v2.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/crypto/asymmetric-keys.txt
Documentation/devicetree/bindings/arm/cpu-capacity.txt
Documentation/devicetree/bindings/arm/shmobile.txt
Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt [new file with mode: 0644]
Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt [deleted file]
Documentation/devicetree/bindings/display/panel/simple-panel.txt
Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
Documentation/devicetree/bindings/i2c/i2c-omap.txt
Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt [new file with mode: 0644]
Documentation/devicetree/bindings/timer/csky,mptimer.txt [new file with mode: 0644]
Documentation/filesystems/overlayfs.txt
Documentation/filesystems/porting
Documentation/filesystems/ubifs-authentication.md [new file with mode: 0644]
Documentation/filesystems/ubifs.txt
Documentation/filesystems/vfs.txt
Documentation/i2c/busses/i2c-nvidia-gpu [new file with mode: 0644]
Documentation/kbuild/makefiles.txt
Documentation/laptops/lg-laptop.rst [new file with mode: 0644]
Documentation/networking/ice.rst
Documentation/networking/ip-sysctl.txt
Documentation/process/index.rst
Documentation/process/programming-language.rst [new file with mode: 0644]
Documentation/security/keys/core.rst
Documentation/security/self-protection.rst
Documentation/sysctl/kernel.txt
Documentation/x86/x86_64/mm.txt
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/termios.h
arch/alpha/include/uapi/asm/ioctls.h
arch/alpha/include/uapi/asm/termbits.h
arch/arm/boot/dts/imx53-ppd.dts
arch/arm/boot/dts/imx6sll.dtsi
arch/arm/boot/dts/imx6sx-sdb.dtsi
arch/arm/boot/dts/stm32mp157c.dtsi
arch/arm/boot/dts/vf610m4-colibri.dts
arch/arm/include/asm/pgtable-2level.h
arch/arm/mach-omap1/board-ams-delta.c
arch/arm/mm/proc-v7.S
arch/arm/plat-orion/mpp.c
arch/arm64/Makefile
arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
arch/arm64/boot/dts/renesas/r8a7795.dtsi
arch/arm64/boot/dts/renesas/r8a77980-condor.dts
arch/arm64/configs/defconfig
arch/arm64/include/asm/percpu.h
arch/arm64/include/asm/processor.h
arch/arm64/kernel/crash_dump.c
arch/arm64/kernel/probes/kprobes.c
arch/arm64/kernel/process.c
arch/arm64/mm/dma-mapping.c
arch/arm64/mm/init.c
arch/arm64/mm/mmu.c
arch/csky/Kconfig.debug
arch/csky/Makefile
arch/csky/boot/dts/Makefile
arch/m68k/include/asm/pgtable_mm.h
arch/microblaze/include/asm/pgtable.h
arch/mips/Makefile
arch/mips/cavium-octeon/executive/cvmx-helper.c
arch/mips/mm/dma-noncoherent.c
arch/mips/vdso/Makefile
arch/nds32/include/asm/pgtable.h
arch/parisc/include/asm/pgtable.h
arch/powerpc/Kconfig
arch/powerpc/Makefile
arch/powerpc/boot/dts/fsl/t2080rdb.dts
arch/powerpc/boot/dts/mpc885ads.dts
arch/powerpc/include/asm/code-patching.h
arch/powerpc/include/asm/mmu-8xx.h
arch/powerpc/include/asm/rtas.h
arch/powerpc/kernel/head_8xx.S
arch/powerpc/kernel/process.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/emulate.c
arch/powerpc/mm/8xx_mmu.c
arch/powerpc/perf/8xx-pmu.c
arch/powerpc/platforms/40x/Kconfig
arch/powerpc/platforms/44x/Kconfig
arch/powerpc/platforms/pseries/lparcfg.c
arch/powerpc/xmon/Makefile
arch/riscv/configs/defconfig
arch/s390/Makefile
arch/s390/boot/compressed/Makefile
arch/s390/configs/debug_defconfig
arch/s390/configs/performance_defconfig
arch/s390/defconfig
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/pgalloc.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/thread_info.h
arch/s390/include/asm/tlb.h
arch/s390/kernel/entry.S
arch/s390/kernel/perf_cpum_cf.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/kernel/vdso32/Makefile
arch/s390/kernel/vdso64/Makefile
arch/s390/kernel/vmlinux.lds.S
arch/s390/mm/pgalloc.c
arch/s390/numa/numa.c
arch/sparc/kernel/perf_event.c
arch/sparc/kernel/systbls_64.S
arch/um/drivers/ubd_kern.c
arch/x86/Kconfig
arch/x86/boot/cpucheck.c
arch/x86/boot/early_serial_console.c
arch/x86/boot/edd.c
arch/x86/boot/main.c
arch/x86/boot/memory.c
arch/x86/boot/regs.c
arch/x86/boot/video-vesa.c
arch/x86/boot/video.c
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/events/intel/core.c
arch/x86/include/asm/compat.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/ftrace.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/xen/page.h
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/microcode/core.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/if.c
arch/x86/kernel/early_printk.c
arch/x86/kernel/head64.c
arch/x86/kernel/msr.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/paravirt_patch_32.c
arch/x86/kernel/paravirt_patch_64.c
arch/x86/kernel/process_64.c
arch/x86/kernel/sys_x86_64.c
arch/x86/kernel/traps.c
arch/x86/kvm/emulate.c
arch/x86/kvm/lapic.c
arch/x86/kvm/x86.c
arch/x86/mm/hugetlbpage.c
arch/x86/mm/mmap.c
arch/x86/mm/numa_emulation.c
arch/x86/mm/pageattr.c
arch/x86/tools/relocs.c
arch/x86/um/asm/elf.h
arch/x86/xen/p2m.c
arch/x86/xen/spinlock.c
arch/xtensa/Kconfig
arch/xtensa/boot/Makefile
arch/xtensa/kernel/vmlinux.lds.S
arch/xtensa/mm/init.c
block/bfq-cgroup.c
block/bfq-iosched.c
block/bio.c
block/blk-cgroup.c
block/blk-core.c
block/blk-iolatency.c
block/blk-lib.c
block/blk-merge.c
block/blk-sysfs.c
block/blk-throttle.c
block/blk.h
block/bounce.c
block/cfq-iosched.c
crypto/asymmetric_keys/Kconfig
crypto/asymmetric_keys/Makefile
crypto/asymmetric_keys/asym_tpm.c [new file with mode: 0644]
crypto/asymmetric_keys/asymmetric_keys.h
crypto/asymmetric_keys/asymmetric_type.c
crypto/asymmetric_keys/pkcs7_parser.c
crypto/asymmetric_keys/pkcs8.asn1 [new file with mode: 0644]
crypto/asymmetric_keys/pkcs8_parser.c [new file with mode: 0644]
crypto/asymmetric_keys/public_key.c
crypto/asymmetric_keys/signature.c
crypto/asymmetric_keys/tpm.asn1 [new file with mode: 0644]
crypto/asymmetric_keys/tpm_parser.c [new file with mode: 0644]
crypto/asymmetric_keys/x509_cert_parser.c
crypto/rsa-pkcs1pad.c
drivers/acpi/device_pm.c
drivers/ata/sata_rcar.c
drivers/auxdisplay/panel.c
drivers/block/brd.c
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_receiver.c
drivers/block/loop.c
drivers/block/mtip32xx/mtip32xx.c
drivers/block/nbd.c
drivers/block/xen-blkfront.c
drivers/clk/clk-fixed-factor.c
drivers/clk/meson/axg.c
drivers/clk/meson/gxbb.c
drivers/clk/qcom/gcc-qcs404.c
drivers/clocksource/Kconfig
drivers/clocksource/Makefile
drivers/clocksource/timer-gx6605s.c [new file with mode: 0644]
drivers/clocksource/timer-mp-csky.c [new file with mode: 0644]
drivers/edac/Kconfig
drivers/edac/skx_edac.c
drivers/firmware/Kconfig
drivers/firmware/Makefile
drivers/firmware/dcdbas.c [deleted file]
drivers/firmware/dcdbas.h [deleted file]
drivers/firmware/dell_rbu.c [deleted file]
drivers/firmware/efi/efivars.c
drivers/fsi/fsi-sbefifo.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
drivers/gpu/drm/amd/display/dc/os_types.h
drivers/gpu/drm/amd/include/amd_shared.h
drivers/gpu/drm/amd/include/atomfirmware.h
drivers/gpu/drm/amd/powerplay/amd_powerplay.c
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h
drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
drivers/gpu/drm/bridge/ti-sn65dsi86.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_connector.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/etnaviv/etnaviv_sched.c
drivers/gpu/drm/exynos/exynos5433_drm_decon.c
drivers/gpu/drm/exynos/exynos_drm_crtc.c
drivers/gpu/drm/exynos/exynos_drm_drv.h
drivers/gpu/drm/exynos/exynos_drm_dsi.c
drivers/gpu/drm/exynos/exynos_drm_fbdev.c
drivers/gpu/drm/i915/gvt/gtt.c
drivers/gpu/drm/i915/gvt/gtt.h
drivers/gpu/drm/i915/gvt/handlers.c
drivers/gpu/drm/i915/gvt/mmio_context.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_gem_gtt.h
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_audio.c
drivers/gpu/drm/i915/intel_cdclk.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_dp_mst.c
drivers/gpu/drm/i915/intel_lpe_audio.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/selftests/huge_pages.c
drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
drivers/gpu/drm/nouveau/dispnv50/disp.c
drivers/gpu/drm/panel/panel-simple.c
drivers/gpu/drm/sun4i/sun4i_lvds.c
drivers/gpu/drm/sun4i/sun4i_rgb.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/gpu/vga/vga_switcheroo.c
drivers/hid/Kconfig
drivers/hid/hid-alps.c
drivers/hid/hid-asus.c
drivers/hid/hid-ids.h
drivers/hid/hid-quirks.c
drivers/hid/i2c-hid/i2c-hid-core.c
drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
drivers/hid/usbhid/hiddev.c
drivers/hwmon/hwmon.c
drivers/hwmon/ibmpowernv.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/Makefile
drivers/i2c/busses/i2c-nvidia-gpu.c [new file with mode: 0644]
drivers/i2c/busses/i2c-qcom-geni.c
drivers/i2c/i2c-core-base.c
drivers/irqchip/irq-mvebu-sei.c
drivers/isdn/mISDN/l1oip_core.c
drivers/leds/trigger/ledtrig-pattern.c
drivers/md/raid0.c
drivers/misc/lkdtm/Makefile
drivers/misc/lkdtm/core.c
drivers/misc/lkdtm/lkdtm.h
drivers/misc/lkdtm/stackleak.c [new file with mode: 0644]
drivers/misc/vmw_vmci/vmci_queue_pair.c
drivers/mtd/devices/Kconfig
drivers/mtd/maps/sa1100-flash.c
drivers/mtd/nand/raw/nand_base.c
drivers/mtd/spi-nor/cadence-quadspi.c
drivers/mtd/spi-nor/spi-nor.c
drivers/mtd/ubi/attach.c
drivers/mtd/ubi/build.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_netlink.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/ethernet/atheros/alx/alx.h
drivers/net/ethernet/atheros/alx/main.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
drivers/net/ethernet/intel/Kconfig
drivers/net/ethernet/intel/fm10k/fm10k_iov.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/fm10k/fm10k_pci.c
drivers/net/ethernet/intel/fm10k/fm10k_type.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/ixgbe/Makefile
drivers/net/ethernet/intel/ixgbe/ixgbe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
drivers/net/ethernet/intel/ixgbevf/Makefile
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/marvell/mvpp2/mvpp2.h
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
drivers/net/ntb_netdev.c
drivers/net/phy/realtek.c
drivers/net/usb/smsc95xx.c
drivers/ntb/hw/idt/Kconfig
drivers/ntb/hw/idt/ntb_hw_idt.c
drivers/ntb/hw/idt/ntb_hw_idt.h
drivers/ntb/hw/intel/ntb_hw_gen1.c
drivers/ntb/ntb_transport.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/multipath.c
drivers/nvme/host/pci.c
drivers/nvme/target/core.c
drivers/nvme/target/io-cmd-file.c
drivers/nvme/target/rdma.c
drivers/of/base.c
drivers/of/device.c
drivers/of/of_numa.c
drivers/platform/x86/Kconfig
drivers/platform/x86/Makefile
drivers/platform/x86/acerhdf.c
drivers/platform/x86/asus-wmi.c
drivers/platform/x86/dcdbas.c [new file with mode: 0644]
drivers/platform/x86/dcdbas.h [new file with mode: 0644]
drivers/platform/x86/dell-smbios-smm.c
drivers/platform/x86/dell_rbu.c [new file with mode: 0644]
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/intel-hid.c
drivers/platform/x86/intel-rst.c
drivers/platform/x86/intel-smartconnect.c
drivers/platform/x86/intel-wmi-thunderbolt.c
drivers/platform/x86/intel_atomisp2_pm.c [new file with mode: 0644]
drivers/platform/x86/intel_bxtwc_tmu.c
drivers/platform/x86/intel_cht_int33fe.c
drivers/platform/x86/intel_chtdc_ti_pwrbtn.c
drivers/platform/x86/intel_int0002_vgpio.c
drivers/platform/x86/intel_ips.c
drivers/platform/x86/intel_ips.h
drivers/platform/x86/intel_menlow.c
drivers/platform/x86/intel_mid_powerbtn.c
drivers/platform/x86/intel_mid_thermal.c
drivers/platform/x86/intel_oaktrail.c
drivers/platform/x86/intel_pmc_core.c
drivers/platform/x86/intel_pmc_core.h
drivers/platform/x86/intel_pmc_ipc.c
drivers/platform/x86/intel_punit_ipc.c
drivers/platform/x86/intel_scu_ipc.c
drivers/platform/x86/intel_scu_ipcutil.c
drivers/platform/x86/intel_telemetry_core.c
drivers/platform/x86/intel_telemetry_debugfs.c
drivers/platform/x86/intel_telemetry_pltdrv.c
drivers/platform/x86/intel_turbo_max_3.c
drivers/platform/x86/lg-laptop.c [new file with mode: 0644]
drivers/platform/x86/mlx-platform.c
drivers/platform/x86/touchscreen_dmi.c
drivers/platform/x86/wmi.c
drivers/pwm/Kconfig
drivers/pwm/pwm-lpss-platform.c
drivers/pwm/pwm-lpss.c
drivers/pwm/pwm-lpss.h
drivers/pwm/pwm-rcar.c
drivers/pwm/pwm-renesas-tpu.c
drivers/pwm/pwm-tegra.c
drivers/pwm/sysfs.c
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_core_mpc.h
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3_main.c
drivers/scsi/3w-9xxx.c
drivers/scsi/3w-sas.c
drivers/scsi/Kconfig
drivers/scsi/aha152x.c
drivers/scsi/mvsas/mv_sas.c
drivers/scsi/pcmcia/aha152x_core.c
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_iocb.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_mbx.c
drivers/scsi/qla2xxx/qla_mr.c
drivers/scsi/qla2xxx/qla_nx.c
drivers/scsi/qla2xxx/qla_nx2.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_sup.c
drivers/scsi/qla2xxx/qla_target.c
drivers/soc/ti/knav_qmss.h
drivers/soc/ti/knav_qmss_acc.c
drivers/soc/ti/knav_qmss_queue.c
drivers/target/iscsi/iscsi_target_util.c
drivers/target/target_core_alua.c
drivers/target/target_core_file.c
drivers/target/target_core_transport.c
drivers/tty/serial/sh-sci.c
drivers/tty/tty_baudrate.c
drivers/tty/vt/vt.c
drivers/usb/typec/ucsi/Kconfig
drivers/usb/typec/ucsi/Makefile
drivers/usb/typec/ucsi/ucsi_ccg.c [new file with mode: 0644]
drivers/usb/usbip/usbip_common.c
drivers/vhost/scsi.c
drivers/vhost/vhost.c
drivers/virtio/virtio_balloon.c
drivers/xen/grant-table.c
drivers/xen/privcmd-buf.c
drivers/xen/pvcalls-back.c
fs/9p/vfs_addr.c
fs/9p/vfs_dir.c
fs/9p/xattr.c
fs/afs/Kconfig
fs/afs/Makefile
fs/afs/addr_list.c
fs/afs/afs.h
fs/afs/cache.c
fs/afs/callback.c
fs/afs/cell.c
fs/afs/cmservice.c
fs/afs/dir.c
fs/afs/dynroot.c
fs/afs/file.c
fs/afs/flock.c
fs/afs/fs_probe.c [new file with mode: 0644]
fs/afs/fsclient.c
fs/afs/inode.c
fs/afs/internal.h
fs/afs/mntpt.c
fs/afs/proc.c
fs/afs/protocol_yfs.h [new file with mode: 0644]
fs/afs/rotate.c
fs/afs/rxrpc.c
fs/afs/security.c
fs/afs/server.c
fs/afs/server_list.c
fs/afs/super.c
fs/afs/vl_list.c [new file with mode: 0644]
fs/afs/vl_probe.c [new file with mode: 0644]
fs/afs/vl_rotate.c [new file with mode: 0644]
fs/afs/vlclient.c
fs/afs/volume.c
fs/afs/write.c
fs/afs/xattr.c
fs/afs/yfsclient.c [new file with mode: 0644]
fs/bfs/inode.c
fs/block_dev.c
fs/btrfs/ctree.h
fs/btrfs/file.c
fs/btrfs/ioctl.c
fs/buffer.c
fs/ceph/file.c
fs/ceph/mds_client.c
fs/ceph/quota.c
fs/cifs/cifs_debug.c
fs/cifs/cifs_spnego.c
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/cifsglob.h
fs/cifs/cifspdu.h
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/misc.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/smbdirect.c
fs/cifs/trace.h
fs/cifs/transport.c
fs/direct-io.c
fs/dlm/lowcomms.c
fs/exofs/super.c
fs/ext4/ext4.h
fs/ext4/ialloc.c
fs/ext4/namei.c
fs/ext4/page-io.c
fs/fuse/file.c
fs/ioctl.c
fs/iomap.c
fs/namespace.c
fs/nfs/nfs4file.c
fs/nfs/nfs4proc.c
fs/nfsd/vfs.c
fs/ntfs/namei.c
fs/ocfs2/buffer_head_io.c
fs/ocfs2/cluster/tcp.c
fs/ocfs2/dir.c
fs/ocfs2/dlmglue.c
fs/ocfs2/file.c
fs/ocfs2/journal.c
fs/ocfs2/move_extents.c
fs/ocfs2/refcounttree.c
fs/ocfs2/refcounttree.h
fs/ocfs2/stackglue.c
fs/ocfs2/stackglue.h
fs/orangefs/inode.c
fs/overlayfs/copy_up.c
fs/overlayfs/dir.c
fs/overlayfs/file.c
fs/overlayfs/inode.c
fs/overlayfs/namei.c
fs/overlayfs/overlayfs.h
fs/overlayfs/super.c
fs/overlayfs/util.c
fs/proc/base.c
fs/read_write.c
fs/splice.c
fs/ubifs/Kconfig
fs/ubifs/Makefile
fs/ubifs/auth.c [new file with mode: 0644]
fs/ubifs/debug.c
fs/ubifs/gc.c
fs/ubifs/io.c
fs/ubifs/journal.c
fs/ubifs/log.c
fs/ubifs/lpt.c
fs/ubifs/lpt_commit.c
fs/ubifs/master.c
fs/ubifs/misc.h
fs/ubifs/recovery.c
fs/ubifs/replay.c
fs/ubifs/sb.c
fs/ubifs/super.c
fs/ubifs/tnc.c
fs/ubifs/tnc_commit.c
fs/ubifs/tnc_misc.c
fs/ubifs/ubifs-media.h
fs/ubifs/ubifs.h
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/xfs_file.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_message.c
fs/xfs/xfs_reflink.c
fs/xfs/xfs_reflink.h
include/asm-generic/4level-fixup.h
include/asm-generic/5level-fixup.h
include/asm-generic/pgtable-nop4d-hack.h
include/asm-generic/pgtable-nop4d.h
include/asm-generic/pgtable-nopmd.h
include/asm-generic/pgtable-nopud.h
include/asm-generic/pgtable.h
include/crypto/asym_tpm_subtype.h [new file with mode: 0644]
include/crypto/public_key.h
include/drm/drm_connector.h
include/keys/asymmetric-subtype.h
include/keys/trusted.h [new file with mode: 0644]
include/linux/adxl.h
include/linux/avf/virtchnl.h
include/linux/bio.h
include/linux/blk-cgroup.h
include/linux/blk_types.h
include/linux/bpf_verifier.h
include/linux/ceph/ceph_features.h
include/linux/cgroup.h
include/linux/compat.h
include/linux/compiler-clang.h
include/linux/compiler-gcc.h
include/linux/compiler-intel.h
include/linux/compiler.h
include/linux/compiler_attributes.h [new file with mode: 0644]
include/linux/compiler_types.h
include/linux/cpuhotplug.h
include/linux/fs.h
include/linux/gfp.h
include/linux/hid.h
include/linux/inetdevice.h
include/linux/key-type.h
include/linux/keyctl.h [new file with mode: 0644]
include/linux/mempolicy.h
include/linux/mm.h
include/linux/mtd/nand.h
include/linux/netdevice.h
include/linux/netfilter/ipset/ip_set.h
include/linux/netfilter/ipset/ip_set_comment.h
include/linux/nmi.h
include/linux/notifier.h
include/linux/platform_data/x86/asus-wmi.h [new file with mode: 0644]
include/linux/sched.h
include/linux/stackleak.h [new file with mode: 0644]
include/linux/sunrpc/gss_krb5.h
include/linux/uio.h
include/linux/writeback.h
include/net/addrconf.h
include/net/af_unix.h
include/net/if_inet6.h
include/net/netfilter/nf_conntrack_l4proto.h
include/trace/events/afs.h
include/uapi/linux/keyctl.h
include/uapi/linux/kfd_ioctl.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/netfilter_bridge.h
include/uapi/linux/perf_event.h
include/uapi/linux/sctp.h
include/uapi/linux/virtio_balloon.h
include/xen/xen-ops.h
kernel/Makefile
kernel/bpf/core.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/cgroup/cgroup.c
kernel/configs/kvm_guest.config
kernel/events/core.c
kernel/fork.c
kernel/irq/matrix.c
kernel/kexec_file.c
kernel/resource.c
kernel/sched/rt.c
kernel/sched/topology.c
kernel/stackleak.c [new file with mode: 0644]
kernel/sysctl.c
kernel/time/time.c
kernel/trace/blktrace.c
kernel/trace/trace_printk.c
kernel/trace/trace_probe.c
kernel/user_namespace.c
lib/iov_iter.c
lib/raid6/test/Makefile
mm/filemap.c
mm/huge_memory.c
mm/memcontrol.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/page_io.c
mm/page_poison.c
mm/percpu.c
mm/shmem.c
net/9p/client.c
net/9p/trans_virtio.c
net/bluetooth/6lowpan.c
net/bluetooth/a2mp.c
net/bluetooth/smp.c
net/ceph/messenger.c
net/core/dev.c
net/core/netpoll.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock.c
net/ipv4/igmp.c
net/ipv4/ip_fragment.c
net/ipv4/ip_sockglue.c
net/ipv4/tcp_bpf.c
net/ipv6/af_inet6.c
net/ipv6/anycast.c
net/ipv6/ip6_fib.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/netfilter/ipset/ip_set_core.c
net/netfilter/ipset/ip_set_hash_netportnet.c
net/netfilter/ipset/ip_set_list_set.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_proto_dccp.c
net/netfilter/nf_conntrack_proto_generic.c
net/netfilter/nf_conntrack_proto_icmp.c
net/netfilter/nf_conntrack_proto_icmpv6.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_proto_udp.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nft_compat.c
net/netfilter/nft_numgen.c
net/netfilter/nft_osf.c
net/netfilter/xt_IDLETIMER.c
net/openvswitch/conntrack.c
net/openvswitch/flow_netlink.c
net/rxrpc/ar-internal.h
net/rxrpc/call_event.c
net/rxrpc/output.c
net/sctp/associola.c
net/sctp/outqueue.c
net/sctp/socket.c
net/smc/smc_clc.c
net/socket.c
net/sunrpc/auth_gss/gss_krb5_mech.c
net/sunrpc/auth_gss/gss_krb5_seal.c
net/sunrpc/auth_gss/gss_krb5_wrap.c
net/sunrpc/svcsock.c
net/sunrpc/xprtsock.c
net/tipc/topsrv.c
net/tls/tls_device.c
net/tls/tls_sw.c
net/xfrm/Kconfig
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
scripts/Kbuild.include
scripts/Makefile.extrawarn
scripts/Makefile.gcc-plugins
scripts/gcc-plugins/Kconfig
scripts/gcc-plugins/stackleak_plugin.c [new file with mode: 0644]
scripts/kconfig/Makefile
scripts/kconfig/conf.c
scripts/kconfig/merge_config.sh
security/apparmor/apparmorfs.c
security/apparmor/file.c
security/apparmor/include/cred.h
security/apparmor/include/net.h
security/apparmor/include/policy.h
security/apparmor/include/secid.h
security/apparmor/lib.c
security/apparmor/lsm.c
security/apparmor/net.c
security/apparmor/policy.c
security/apparmor/policy_unpack.c
security/apparmor/secid.c
security/keys/Makefile
security/keys/compat.c
security/keys/internal.h
security/keys/keyctl.c
security/keys/keyctl_pkey.c [new file with mode: 0644]
security/keys/trusted.c
security/keys/trusted.h [deleted file]
sound/firewire/amdtp-stream.c
sound/firewire/dice/dice.c
sound/pci/ca0106/ca0106.h
sound/pci/hda/thinkpad_helper.c
tools/arch/arm64/include/uapi/asm/unistd.h
tools/arch/powerpc/include/uapi/asm/kvm.h
tools/arch/s390/include/uapi/asm/kvm.h
tools/arch/x86/include/uapi/asm/kvm.h
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/linux/fs.h [new file with mode: 0644]
tools/include/uapi/linux/if_link.h
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/mman.h
tools/include/uapi/linux/netlink.h
tools/include/uapi/linux/perf_event.h
tools/include/uapi/sound/asound.h
tools/lib/bpf/libbpf.c
tools/lib/subcmd/parse-options.c
tools/lib/subcmd/parse-options.h
tools/objtool/check.c
tools/objtool/check.h
tools/objtool/elf.c
tools/objtool/elf.h
tools/perf/Documentation/build-xed.txt [new file with mode: 0644]
tools/perf/Documentation/intel-pt.txt
tools/perf/Documentation/itrace.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perf-trace.txt
tools/perf/Makefile.perf
tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
tools/perf/arch/sparc/Makefile
tools/perf/arch/sparc/annotate/instructions.c [new file with mode: 0644]
tools/perf/builtin-record.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/check-headers.sh
tools/perf/perf.h
tools/perf/scripts/python/call-graph-from-sql.py [deleted file]
tools/perf/scripts/python/export-to-postgresql.py
tools/perf/scripts/python/export-to-sqlite.py
tools/perf/scripts/python/exported-sql-viewer.py [new file with mode: 0755]
tools/perf/trace/beauty/Build
tools/perf/trace/beauty/beauty.h
tools/perf/trace/beauty/clone.c
tools/perf/trace/beauty/drm_ioctl.sh
tools/perf/trace/beauty/eventfd.c
tools/perf/trace/beauty/fcntl.c
tools/perf/trace/beauty/flock.c
tools/perf/trace/beauty/futex_op.c
tools/perf/trace/beauty/futex_val3.c
tools/perf/trace/beauty/ioctl.c
tools/perf/trace/beauty/kcmp.c
tools/perf/trace/beauty/kcmp_type.sh
tools/perf/trace/beauty/kvm_ioctl.sh
tools/perf/trace/beauty/madvise_behavior.sh
tools/perf/trace/beauty/mmap.c
tools/perf/trace/beauty/mmap_flags.sh [new file with mode: 0755]
tools/perf/trace/beauty/mode_t.c
tools/perf/trace/beauty/mount_flags.c [new file with mode: 0644]
tools/perf/trace/beauty/mount_flags.sh [new file with mode: 0755]
tools/perf/trace/beauty/msg_flags.c
tools/perf/trace/beauty/open_flags.c
tools/perf/trace/beauty/perf_event_open.c
tools/perf/trace/beauty/perf_ioctl.sh
tools/perf/trace/beauty/pid.c
tools/perf/trace/beauty/pkey_alloc.c
tools/perf/trace/beauty/pkey_alloc_access_rights.sh
tools/perf/trace/beauty/prctl.c
tools/perf/trace/beauty/prctl_option.sh
tools/perf/trace/beauty/sched_policy.c
tools/perf/trace/beauty/seccomp.c
tools/perf/trace/beauty/signum.c
tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
tools/perf/trace/beauty/sockaddr.c
tools/perf/trace/beauty/socket.c
tools/perf/trace/beauty/socket_ipproto.sh
tools/perf/trace/beauty/socket_type.c
tools/perf/trace/beauty/statx.c
tools/perf/trace/beauty/vhost_virtio_ioctl.sh
tools/perf/trace/beauty/waitid_options.c
tools/perf/util/annotate.c
tools/perf/util/auxtrace.c
tools/perf/util/auxtrace.h
tools/perf/util/cs-etm.c
tools/perf/util/env.h
tools/perf/util/event.c
tools/perf/util/evlist.c
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/genelf.h
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/intel-bts.c
tools/perf/util/intel-pt.c
tools/perf/util/machine.c
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-events.l
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.h
tools/perf/util/thread-stack.c
tools/perf/util/thread-stack.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/unwind-libdw.c
tools/testing/selftests/bpf/flow_dissector_load.c
tools/testing/selftests/bpf/test_skb_cgroup_id.sh
tools/testing/selftests/bpf/test_sock_addr.sh
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
tools/testing/selftests/powerpc/cache_shape/Makefile
tools/testing/selftests/powerpc/pmu/ebb/Makefile
tools/testing/selftests/powerpc/ptrace/Makefile
tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
tools/testing/selftests/powerpc/security/Makefile
tools/testing/selftests/powerpc/security/rfi_flush.c
tools/testing/selftests/powerpc/signal/Makefile
tools/testing/selftests/powerpc/switch_endian/Makefile
tools/testing/selftests/powerpc/utils.c

index a76be45fef6ca5b2d23139304ff5ea338bdd1d07..28fecafa65069c1af077453a4159a6a290949982 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -159,6 +159,7 @@ Peter Oruba <peter@oruba.de>
 Peter Oruba <peter.oruba@amd.com>
 Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
 Praveen BP <praveenbp@ti.com>
+Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
 Qais Yousef <qsyousef@gmail.com> <qais.yousef@imgtec.com>
 Oleksij Rempel <linux@rempel-privat.de> <bug-track@fisher-privat.net>
 Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
index fb3d1e03b8819bb950e961172d27dac3a3191129..1e5d172e064624d96216eae51a4be60c02a29979 100644 (file)
@@ -37,8 +37,8 @@ Description:
                  0-|   /             \/             \/
                    +---0----1----2----3----4----5----6------------> time (s)
 
-               2. To make the LED go instantly from one brigntess value to another,
-               we should use use zero-time lengths (the brightness must be same as
+               2. To make the LED go instantly from one brightness value to another,
+               we should use zero-time lengths (the brightness must be same as
                the previous tuple's). So the format should be:
                "brightness_1 duration_1 brightness_1 0 brightness_2 duration_2
                brightness_2 0 ...". For example:
diff --git a/Documentation/ABI/testing/sysfs-platform-lg-laptop b/Documentation/ABI/testing/sysfs-platform-lg-laptop
new file mode 100644 (file)
index 0000000..cf47749
--- /dev/null
@@ -0,0 +1,35 @@
+What:          /sys/devices/platform/lg-laptop/reader_mode
+Date:          October 2018
+KernelVersion: 4.20
+Contact:       "Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Control reader mode. 1 means on, 0 means off.
+
+What:          /sys/devices/platform/lg-laptop/fn_lock
+Date:          October 2018
+KernelVersion: 4.20
+Contact:       "Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Control FN lock mode. 1 means on, 0 means off.
+
+What:          /sys/devices/platform/lg-laptop/battery_care_limit
+Date:          October 2018
+KernelVersion: 4.20
+Contact:       "Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Maximal battery charge level. Accepted values are 80 or 100.
+
+What:          /sys/devices/platform/lg-laptop/fan_mode
+Date:          October 2018
+KernelVersion: 4.20
+Contact:       "Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Control fan mode. 1 for performance mode, 0 for silent mode.
+
+What:          /sys/devices/platform/lg-laptop/usb_charge
+Date:          October 2018
+KernelVersion: 4.20
+Contact:       "Matan Ziv-Av <matan@svgalib.org>
+Description:
+        Control USB port charging when device is turned off.
+        1 means on, 0 means off.
index 8384c681a4b2e0cb88595b0e95eb6660ab5d7857..476722b7b6367ca38bf0e3263f3e132b515dcfd6 100644 (file)
@@ -1879,10 +1879,8 @@ following two functions.
 
   wbc_init_bio(@wbc, @bio)
        Should be called for each bio carrying writeback data and
-       associates the bio with the inode's owner cgroup and the
-       corresponding request queue.  This must be called after
-       a queue (device) has been associated with the bio and
-       before submission.
+       associates the bio with the inode's owner cgroup.  Can be
+       called anytime between bio allocation and submission.
 
   wbc_account_io(@wbc, @page, @bytes)
        Should be called for each data segment being written out.
@@ -1901,7 +1899,7 @@ the configuration, the bio may be executed at a lower priority and if
 the writeback session is holding shared resources, e.g. a journal
 entry, may lead to priority inversion.  There is no one easy solution
 for the problem.  Filesystems can try to work around specific problem
-cases by skipping wbc_init_bio() or using bio_associate_create_blkg()
+cases by skipping wbc_init_bio() or using bio_associate_blkcg()
 directly.
 
 
index b90fe3b6bc6c78555d5470bbfec95fd7bd691063..81d1d5a7472804e50a663480ef92dc67abc71ab5 100644 (file)
                        earlyprintk=serial[,0x...[,baudrate]]
                        earlyprintk=ttySn[,baudrate]
                        earlyprintk=dbgp[debugController#]
-                       earlyprintk=pciserial,bus:device.function[,baudrate]
+                       earlyprintk=pciserial[,force],bus:device.function[,baudrate]
                        earlyprintk=xdbc[xhciController#]
 
                        earlyprintk is useful when the kernel crashes before
 
                        The sclp output can only be used on s390.
 
+                       The optional "force" to "pciserial" enables use of a
+                       PCI device even when its classcode is not of the
+                       UART class.
+
        edac_report=    [HW,EDAC] Control how to report EDAC event
                        Format: {"on" | "off" | "force"}
                        on: enable EDAC to report H/W event. May be overridden
index 5969bf42562a8752535f3183de6a9b66fa07985b..8763866b11cfd0f6af865ed67d8ba6db75a48b60 100644 (file)
@@ -183,6 +183,10 @@ and looks like the following:
 
                void (*describe)(const struct key *key, struct seq_file *m);
                void (*destroy)(void *payload);
+               int (*query)(const struct kernel_pkey_params *params,
+                            struct kernel_pkey_query *info);
+               int (*eds_op)(struct kernel_pkey_params *params,
+                             const void *in, void *out);
                int (*verify_signature)(const struct key *key,
                                        const struct public_key_signature *sig);
        };
@@ -207,12 +211,22 @@ There are a number of operations defined by the subtype:
      asymmetric key will look after freeing the fingerprint and releasing the
      reference on the subtype module.
 
- (3) verify_signature().
+ (3) query().
 
-     Optional.  These are the entry points for the key usage operations.
-     Currently there is only the one defined.  If not set, the caller will be
-     given -ENOTSUPP.  The subtype may do anything it likes to implement an
-     operation, including offloading to hardware.
+     Mandatory.  This is a function for querying the capabilities of a key.
+
+ (4) eds_op().
+
+     Optional.  This is the entry point for the encryption, decryption and
+     signature creation operations (which are distinguished by the operation ID
+     in the parameter struct).  The subtype may do anything it likes to
+     implement an operation, including offloading to hardware.
+
+ (5) verify_signature().
+
+     Optional.  This is the entry point for signature verification.  The
+     subtype may do anything it likes to implement an operation, including
+     offloading to hardware.
 
 
 ==========================
@@ -234,6 +248,8 @@ Examples of blob formats for which parsers could be implemented include:
  - X.509 ASN.1 stream.
  - Pointer to TPM key.
  - Pointer to UEFI key.
+ - PKCS#8 private key [RFC 5208].
+ - PKCS#5 encrypted private key [RFC 2898].
 
 During key instantiation each parser in the list is tried until one doesn't
 return -EBADMSG.
index 9b5685a1d15d9821efb9dd6d34a29ae1f788e31f..84262cdb8d29ae3f95ef94f6e7b9900897774c17 100644 (file)
@@ -59,9 +59,11 @@ mhz values (normalized w.r.t. the highest value found while parsing the DT).
 ===========================================
 
 Example 1 (ARM 64-bit, 6-cpu system, two clusters):
-capacities-dmips-mhz are scaled w.r.t. 1024 (cpu@0 and cpu@1)
-supposing cluster0@max-freq=1100 and custer1@max-freq=850,
-final capacities are 1024 for cluster0 and 446 for cluster1
+The capacities-dmips-mhz or DMIPS/MHz values (scaled to 1024)
+are 1024 and 578 for cluster0 and cluster1. Further normalization
+is done by the operating system based on cluster0@max-freq=1100 and
+custer1@max-freq=850, final capacities are 1024 for cluster0 and
+446 for cluster1 (576*850/1100).
 
 cpus {
        #address-cells = <2>;
index f5e0f82fd5031efb1570361eabf2d096769ef7f5..58c4256d37a39e5082cdb5f354548dbe0cf160d6 100644 (file)
@@ -27,7 +27,7 @@ SoCs:
     compatible = "renesas,r8a77470"
   - RZ/G2M (R8A774A1)
     compatible = "renesas,r8a774a1"
-  - RZ/G2E (RA8774C0)
+  - RZ/G2E (R8A774C0)
     compatible = "renesas,r8a774c0"
   - R-Car M1A (R8A77781)
     compatible = "renesas,r8a7778"
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt
new file mode 100644 (file)
index 0000000..513f034
--- /dev/null
@@ -0,0 +1,22 @@
+Innolux P120ZDG-BF1 12.02 inch eDP 2K display panel
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
+
+Required properties:
+- compatible: should be "innolux,p120zdg-bf1"
+- power-supply: regulator to provide the supply voltage
+
+Optional properties:
+- enable-gpios: GPIO pin to enable or disable the panel
+- backlight: phandle of the backlight device attached to the panel
+- no-hpd: If HPD isn't hooked up; add this property.
+
+Example:
+       panel_edp: panel-edp {
+               compatible = "innolux,p120zdg-bf1";
+               enable-gpios = <&msmgpio 31 GPIO_ACTIVE_LOW>;
+               power-supply = <&pm8916_l2>;
+               backlight = <&backlight>;
+               no-hpd;
+       };
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt b/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt
deleted file mode 100644 (file)
index a9b3526..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-Innolux TV123WAM 12.3 inch eDP 2K display panel
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
-
-Required properties:
-- compatible: should be "innolux,tv123wam"
-- power-supply: regulator to provide the supply voltage
-
-Optional properties:
-- enable-gpios: GPIO pin to enable or disable the panel
-- backlight: phandle of the backlight device attached to the panel
-
-Example:
-       panel_edp: panel-edp {
-               compatible = "innolux,tv123wam";
-               enable-gpios = <&msmgpio 31 GPIO_ACTIVE_LOW>;
-               power-supply = <&pm8916_l2>;
-               backlight = <&backlight>;
-       };
index 45a457ad38f0f078eed709424e1e237ebcfe420f..b2b872c710f24d69996eb3cb4922ba8dd3915a14 100644 (file)
@@ -11,6 +11,9 @@ Optional properties:
 - ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
 - enable-gpios: GPIO pin to enable or disable the panel
 - backlight: phandle of the backlight device attached to the panel
+- no-hpd: This panel is supposed to communicate that it's ready via HPD
+  (hot plug detect) signal, but the signal isn't hooked up so we should
+  hardcode the max delay from the panel spec when powering up the panel.
 
 Example:
 
index 091c8dfd322910e14712d4a818e9879538abf3d9..b245363d6d60a0b6a42a6a4d675e1219faedc03d 100644 (file)
@@ -3,6 +3,7 @@
 Required properties:
 - compatible :
   - "fsl,imx7ulp-lpi2c" for LPI2C compatible with the one integrated on i.MX7ULP soc
+  - "fsl,imx8qxp-lpi2c" for LPI2C compatible with the one integrated on i.MX8QXP soc
 - reg : address and length of the lpi2c master registers
 - interrupts : lpi2c interrupt
 - clocks : lpi2c clock specifier
index 7e49839d41249ca5168b0de1ea02781a2798486d..4b90ba9f31b70b712c285af7ef7f7be180b39b14 100644 (file)
@@ -1,8 +1,12 @@
 I2C for OMAP platforms
 
 Required properties :
-- compatible : Must be "ti,omap2420-i2c", "ti,omap2430-i2c", "ti,omap3-i2c"
-  or "ti,omap4-i2c"
+- compatible : Must be
+       "ti,omap2420-i2c" for OMAP2420 SoCs
+       "ti,omap2430-i2c" for OMAP2430 SoCs
+       "ti,omap3-i2c" for OMAP3 SoCs
+       "ti,omap4-i2c" for OMAP4+ SoCs
+       "ti,am654-i2c", "ti,omap4-i2c" for AM654 SoCs
 - ti,hwmods : Must be "i2c<n>", n being the instance number (1-based)
 - #address-cells = <1>;
 - #size-cells = <0>;
index 06a363d9ccef9069124ae813bfb449d8cab182c7..b9a1d7402128b95437341b1e2d16d516fddfa114 100644 (file)
@@ -7,6 +7,7 @@ Required properties:
   for da850  - compatible = "ti,da850-ecap", "ti,am3352-ecap", "ti,am33xx-ecap";
   for dra746 - compatible = "ti,dra746-ecap", "ti,am3352-ecap";
   for 66ak2g - compatible = "ti,k2g-ecap", "ti,am3352-ecap";
+  for am654  - compatible = "ti,am654-ecap", "ti,am3352-ecap";
 - #pwm-cells: should be 3. See pwm.txt in this directory for a description of
   the cells format. The PWM channel index ranges from 0 to 4. The only third
   cell flag supported by this binding is PWM_POLARITY_INVERTED.
index e1ef6afbe3a74a89d9040b9d6642cd2c22aa2a0c..7f31fe7e209348ceb04a471e08be0f60b4fff65d 100644 (file)
@@ -3,7 +3,9 @@
 Required Properties:
 - compatible: should be "renesas,pwm-rcar" and one of the following.
  - "renesas,pwm-r8a7743": for RZ/G1M
+ - "renesas,pwm-r8a7744": for RZ/G1N
  - "renesas,pwm-r8a7745": for RZ/G1E
+ - "renesas,pwm-r8a774a1": for RZ/G2M
  - "renesas,pwm-r8a7778": for R-Car M1A
  - "renesas,pwm-r8a7779": for R-Car H1
  - "renesas,pwm-r8a7790": for R-Car H2
@@ -12,6 +14,8 @@ Required Properties:
  - "renesas,pwm-r8a7795": for R-Car H3
  - "renesas,pwm-r8a7796": for R-Car M3-W
  - "renesas,pwm-r8a77965": for R-Car M3-N
+ - "renesas,pwm-r8a77970": for R-Car V3M
+ - "renesas,pwm-r8a77980": for R-Car V3H
  - "renesas,pwm-r8a77990": for R-Car E3
  - "renesas,pwm-r8a77995": for R-Car D3
 - reg: base address and length of the registers block for the PWM.
index d53a16715da6ac33dea19a4a66ad38310feacc95..848a92b53d810eeb055544cbb037a38a54e05371 100644 (file)
@@ -2,13 +2,19 @@
 
 Required Properties:
 
-  - compatible: should be one of the following.
+  - compatible: must contain one or more of the following:
     - "renesas,tpu-r8a73a4": for R8A73A4 (R-Mobile APE6) compatible PWM controller.
     - "renesas,tpu-r8a7740": for R8A7740 (R-Mobile A1) compatible PWM controller.
     - "renesas,tpu-r8a7743": for R8A7743 (RZ/G1M) compatible PWM controller.
+    - "renesas,tpu-r8a7744": for R8A7744 (RZ/G1N) compatible PWM controller.
     - "renesas,tpu-r8a7745": for R8A7745 (RZ/G1E) compatible PWM controller.
     - "renesas,tpu-r8a7790": for R8A7790 (R-Car H2) compatible PWM controller.
-    - "renesas,tpu": for generic R-Car and RZ/G1 TPU PWM controller.
+    - "renesas,tpu-r8a77970": for R8A77970 (R-Car V3M) compatible PWM
+                             controller.
+    - "renesas,tpu-r8a77980": for R8A77980 (R-Car V3H) compatible PWM
+                             controller.
+    - "renesas,tpu": for the generic TPU PWM controller; this is a fallback for
+                    the entries listed above.
 
   - reg: Base address and length of each memory resource used by the PWM
     controller hardware module.
diff --git a/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt b/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt
new file mode 100644 (file)
index 0000000..6b04344
--- /dev/null
@@ -0,0 +1,42 @@
+=================
+gx6605s SOC Timer
+=================
+
+The timer is used in gx6605s soc as system timer and the driver
+contain clk event and clk source.
+
+==============================
+timer node bindings definition
+==============================
+
+       Description: Describes gx6605s SOC timer
+
+       PROPERTIES
+
+       - compatible
+               Usage: required
+               Value type: <string>
+               Definition: must be "csky,gx6605s-timer"
+       - reg
+               Usage: required
+               Value type: <u32 u32>
+               Definition: <phyaddr size> in soc from cpu view
+       - clocks
+               Usage: required
+               Value type: phandle + clock specifier cells
+               Definition: must be input clk node
+       - interrupt
+               Usage: required
+               Value type: <u32>
+               Definition: must be timer irq num defined by soc
+
+Examples:
+---------
+
+       timer0: timer@20a000 {
+               compatible = "csky,gx6605s-timer";
+               reg = <0x0020a000 0x400>;
+               clocks = <&dummy_apb_clk>;
+               interrupts = <10>;
+               interrupt-parent = <&intc>;
+       };
diff --git a/Documentation/devicetree/bindings/timer/csky,mptimer.txt b/Documentation/devicetree/bindings/timer/csky,mptimer.txt
new file mode 100644 (file)
index 0000000..15cfec0
--- /dev/null
@@ -0,0 +1,42 @@
+============================
+C-SKY Multi-processors Timer
+============================
+
+C-SKY multi-processors timer is designed for C-SKY SMP system and the
+regs is accessed by cpu co-processor 4 registers with mtcr/mfcr.
+
+ - PTIM_CTLR "cr<0, 14>" Control reg to start reset timer.
+ - PTIM_TSR  "cr<1, 14>" Interrupt cleanup status reg.
+ - PTIM_CCVR "cr<3, 14>" Current counter value reg.
+ - PTIM_LVR  "cr<6, 14>" Window value reg to triger next event.
+
+==============================
+timer node bindings definition
+==============================
+
+       Description: Describes SMP timer
+
+       PROPERTIES
+
+       - compatible
+               Usage: required
+               Value type: <string>
+               Definition: must be "csky,mptimer"
+       - clocks
+               Usage: required
+               Value type: <node>
+               Definition: must be input clk node
+       - interrupts
+               Usage: required
+               Value type: <u32>
+               Definition: must be timer irq num defined by soc
+
+Examples:
+---------
+
+       timer: timer {
+               compatible = "csky,mptimer";
+               clocks = <&dummy_apb_clk>;
+               interrupts = <16>;
+               interrupt-parent = <&intc>;
+       };
index 51c136c821bfb0a190e7daa67ebdc2e6faaf878b..eef7d9d259e8570d102be8c7f1641158950262c2 100644 (file)
@@ -286,6 +286,12 @@ pointed by REDIRECT. This should not be possible on local system as setting
 "trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible
 for untrusted layers like from a pen drive.
 
+Note: redirect_dir={off|nofollow|follow(*)} conflicts with metacopy=on, and
+results in an error.
+
+(*) redirect_dir=follow only conflicts with metacopy=on if upperdir=... is
+given.
+
 Sharing and copying layers
 --------------------------
 
index 321d74b73937231d0dbf927ec89689082d14fcce..cf43bc4dbf319b4f642feaea0608d3bd07b075d6 100644 (file)
@@ -623,6 +623,11 @@ in your dentry operations instead.
        On success you get a new struct file sharing the mount/dentry with the
        original, on failure - ERR_PTR().
 --
+[mandatory]
+       ->clone_file_range() and ->dedupe_file_range have been replaced with
+       ->remap_file_range().  See Documentation/filesystems/vfs.txt for more
+       information.
+--
 [recommended]
        ->lookup() instances doing an equivalent of
                if (IS_ERR(inode))
diff --git a/Documentation/filesystems/ubifs-authentication.md b/Documentation/filesystems/ubifs-authentication.md
new file mode 100644 (file)
index 0000000..028b3e2
--- /dev/null
@@ -0,0 +1,426 @@
+% UBIFS Authentication
+% sigma star gmbh
+% 2018
+
+# Introduction
+
+UBIFS utilizes the fscrypt framework to provide confidentiality for file
+contents and file names. This prevents attacks where an attacker is able to
+read contents of the filesystem on a single point in time. A classic example
+is a lost smartphone where the attacker is unable to read personal data stored
+on the device without the filesystem decryption key.
+
+At the current state, UBIFS encryption however does not prevent attacks where
+the attacker is able to modify the filesystem contents and the user uses the
+device afterwards. In such a scenario an attacker can modify filesystem
+contents arbitrarily without the user noticing. One example is to modify a
+binary to perform a malicious action when executed [DMC-CBC-ATTACK]. Since
+most of the filesystem metadata of UBIFS is stored in plain, this makes it
+fairly easy to swap files and replace their contents.
+
+Other full disk encryption systems like dm-crypt cover all filesystem metadata,
+which makes such kinds of attacks more complicated, but not impossible.
+Especially, if the attacker is given access to the device multiple points in
+time. For dm-crypt and other filesystems that build upon the Linux block IO
+layer, the dm-integrity or dm-verity subsystems [DM-INTEGRITY, DM-VERITY]
+can be used to get full data authentication at the block layer.
+These can also be combined with dm-crypt [CRYPTSETUP2].
+
+This document describes an approach to get file contents _and_ full metadata
+authentication for UBIFS. Since UBIFS uses fscrypt for file contents and file
+name encryption, the authentication system could be tied into fscrypt such that
+existing features like key derivation can be utilized. It should however also
+be possible to use UBIFS authentication without using encryption.
+
+
+## MTD, UBI & UBIFS
+
+On Linux, the MTD (Memory Technology Devices) subsystem provides a uniform
+interface to access raw flash devices. One of the more prominent subsystems that
+work on top of MTD is UBI (Unsorted Block Images). It provides volume management
+for flash devices and is thus somewhat similar to LVM for block devices. In
+addition, it deals with flash-specific wear-leveling and transparent I/O error
+handling. UBI offers logical erase blocks (LEBs) to the layers on top of it
+and maps them transparently to physical erase blocks (PEBs) on the flash.
+
+UBIFS is a filesystem for raw flash which operates on top of UBI. Thus, wear
+leveling and some flash specifics are left to UBI, while UBIFS focuses on
+scalability, performance and recoverability.
+
+
+
+       +------------+ +*******+ +-----------+ +-----+
+       |            | * UBIFS * | UBI-BLOCK | | ... |
+       | JFFS/JFFS2 | +*******+ +-----------+ +-----+
+       |            | +-----------------------------+ +-----------+ +-----+
+       |            | |              UBI            | | MTD-BLOCK | | ... |
+       +------------+ +-----------------------------+ +-----------+ +-----+
+       +------------------------------------------------------------------+
+       |                  MEMORY TECHNOLOGY DEVICES (MTD)                 |
+       +------------------------------------------------------------------+
+       +-----------------------------+ +--------------------------+ +-----+
+       |         NAND DRIVERS        | |        NOR DRIVERS       | | ... |
+       +-----------------------------+ +--------------------------+ +-----+
+
+            Figure 1: Linux kernel subsystems for dealing with raw flash
+
+
+
+Internally, UBIFS maintains multiple data structures which are persisted on
+the flash:
+
+- *Index*: an on-flash B+ tree where the leaf nodes contain filesystem data
+- *Journal*: an additional data structure to collect FS changes before updating
+  the on-flash index and reduce flash wear.
+- *Tree Node Cache (TNC)*: an in-memory B+ tree that reflects the current FS
+  state to avoid frequent flash reads. It is basically the in-memory
+  representation of the index, but contains additional attributes.
+- *LEB property tree (LPT)*: an on-flash B+ tree for free space accounting per
+  UBI LEB.
+
+In the remainder of this section we will cover the on-flash UBIFS data
+structures in more detail. The TNC is of less importance here since it is never
+persisted onto the flash directly. More details on UBIFS can also be found in
+[UBIFS-WP].
+
+
+### UBIFS Index & Tree Node Cache
+
+Basic on-flash UBIFS entities are called *nodes*. UBIFS knows different types
+of nodes. Eg. data nodes (`struct ubifs_data_node`) which store chunks of file
+contents or inode nodes (`struct ubifs_ino_node`) which represent VFS inodes.
+Almost all types of nodes share a common header (`ubifs_ch`) containing basic
+information like node type, node length, a sequence number, etc. (see
+`fs/ubifs/ubifs-media.h`in kernel source). Exceptions are entries of the LPT
+and some less important node types like padding nodes which are used to pad
+unusable content at the end of LEBs.
+
+To avoid re-writing the whole B+ tree on every single change, it is implemented
+as *wandering tree*, where only the changed nodes are re-written and previous
+versions of them are obsoleted without erasing them right away. As a result,
+the index is not stored in a single place on the flash, but *wanders* around
+and there are obsolete parts on the flash as long as the LEB containing them is
+not reused by UBIFS. To find the most recent version of the index, UBIFS stores
+a special node called *master node* into UBI LEB 1 which always points to the
+most recent root node of the UBIFS index. For recoverability, the master node
+is additionally duplicated to LEB 2. Mounting UBIFS is thus a simple read of
+LEB 1 and 2 to get the current master node and from there get the location of
+the most recent on-flash index.
+
+The TNC is the in-memory representation of the on-flash index. It contains some
+additional runtime attributes per node which are not persisted. One of these is
+a dirty-flag which marks nodes that have to be persisted the next time the
+index is written onto the flash. The TNC acts as a write-back cache and all
+modifications of the on-flash index are done through the TNC. Like other caches,
+the TNC does not have to mirror the full index into memory, but reads parts of
+it from flash whenever needed. A *commit* is the UBIFS operation of updating the
+on-flash filesystem structures like the index. On every commit, the TNC nodes
+marked as dirty are written to the flash to update the persisted index.
+
+
+### Journal
+
+To avoid wearing out the flash, the index is only persisted (*commited*) when
+certain conditions are met (eg. `fsync(2)`). The journal is used to record
+any changes (in form of inode nodes, data nodes etc.) between commits
+of the index. During mount, the journal is read from the flash and replayed
+onto the TNC (which will be created on-demand from the on-flash index).
+
+UBIFS reserves a bunch of LEBs just for the journal called *log area*. The
+amount of log area LEBs is configured on filesystem creation (using
+`mkfs.ubifs`) and stored in the superblock node. The log area contains only
+two types of nodes: *reference nodes* and *commit start nodes*. A commit start
+node is written whenever an index commit is performed. Reference nodes are
+written on every journal update. Each reference node points to the position of
+other nodes (inode nodes, data nodes etc.) on the flash that are part of this
+journal entry. These nodes are called *buds* and describe the actual filesystem
+changes including their data.
+
+The log area is maintained as a ring. Whenever the journal is almost full,
+a commit is initiated. This also writes a commit start node so that during
+mount, UBIFS will seek for the most recent commit start node and just replay
+every reference node after that. Every reference node before the commit start
+node will be ignored as they are already part of the on-flash index.
+
+When writing a journal entry, UBIFS first ensures that enough space is
+available to write the reference node and buds part of this entry. Then, the
+reference node is written and afterwards the buds describing the file changes.
+On replay, UBIFS will record every reference node and inspect the location of
+the referenced LEBs to discover the buds. If these are corrupt or missing,
+UBIFS will attempt to recover them by re-reading the LEB. This is however only
+done for the last referenced LEB of the journal. Only this can become corrupt
+because of a power cut. If the recovery fails, UBIFS will not mount. An error
+for every other LEB will directly cause UBIFS to fail the mount operation.
+
+
+       | ----    LOG AREA     ---- | ----------    MAIN AREA    ------------ |
+
+        -----+------+-----+--------+----   ------+-----+-----+---------------
+        \    |      |     |        |   /  /      |     |     |               \
+        / CS |  REF | REF |        |   \  \ DENT | INO | INO |               /
+        \    |      |     |        |   /  /      |     |     |               \
+         ----+------+-----+--------+---   -------+-----+-----+----------------
+                 |     |                  ^            ^
+                 |     |                  |            |
+                 +------------------------+            |
+                       |                               |
+                       +-------------------------------+
+
+
+                Figure 2: UBIFS flash layout of log area with commit start nodes
+                          (CS) and reference nodes (REF) pointing to main area
+                          containing their buds
+
+
+### LEB Property Tree/Table
+
+The LEB property tree is used to store per-LEB information. This includes the
+LEB type and amount of free and *dirty* (old, obsolete content) space [1] on
+the LEB. The type is important, because UBIFS never mixes index nodes with data
+nodes on a single LEB and thus each LEB has a specific purpose. This again is
+useful for free space calculations. See [UBIFS-WP] for more details.
+
+The LEB property tree again is a B+ tree, but it is much smaller than the
+index. Due to its smaller size it is always written as one chunk on every
+commit. Thus, saving the LPT is an atomic operation.
+
+
+[1] Since LEBs can only be appended and never overwritten, there is a
+difference between free space ie. the remaining space left on the LEB to be
+written to without erasing it and previously written content that is obsolete
+but can't be overwritten without erasing the full LEB.
+
+
+# UBIFS Authentication
+
+This chapter introduces UBIFS authentication which enables UBIFS to verify
+the authenticity and integrity of metadata and file contents stored on flash.
+
+
+## Threat Model
+
+UBIFS authentication enables detection of offline data modification. While it
+does not prevent it, it enables (trusted) code to check the integrity and
+authenticity of on-flash file contents and filesystem metadata. This covers
+attacks where file contents are swapped.
+
+UBIFS authentication will not protect against rollback of full flash contents.
+Ie. an attacker can still dump the flash and restore it at a later time without
+detection. It will also not protect against partial rollback of individual
+index commits. That means that an attacker is able to partially undo changes.
+This is possible because UBIFS does not immediately overwrites obsolete
+versions of the index tree or the journal, but instead marks them as obsolete
+and garbage collection erases them at a later time. An attacker can use this by
+erasing parts of the current tree and restoring old versions that are still on
+the flash and have not yet been erased. This is possible, because every commit
+will always write a new version of the index root node and the master node
+without overwriting the previous version. This is further helped by the
+wear-leveling operations of UBI which copies contents from one physical
+eraseblock to another and does not atomically erase the first eraseblock.
+
+UBIFS authentication does not cover attacks where an attacker is able to
+execute code on the device after the authentication key was provided.
+Additional measures like secure boot and trusted boot have to be taken to
+ensure that only trusted code is executed on a device.
+
+
+## Authentication
+
+To be able to fully trust data read from flash, all UBIFS data structures
+stored on flash are authenticated. That is:
+
+- The index which includes file contents, file metadata like extended
+  attributes, file length etc.
+- The journal which also contains file contents and metadata by recording changes
+  to the filesystem
+- The LPT which stores UBI LEB metadata which UBIFS uses for free space accounting
+
+
+### Index Authentication
+
+Through UBIFS' concept of a wandering tree, it already takes care of only
+updating and persisting changed parts from leaf node up to the root node
+of the full B+ tree. This enables us to augment the index nodes of the tree
+with a hash over each node's child nodes. As a result, the index basically also
+a Merkle tree. Since the leaf nodes of the index contain the actual filesystem
+data, the hashes of their parent index nodes thus cover all the file contents
+and file metadata. When a file changes, the UBIFS index is updated accordingly
+from the leaf nodes up to the root node including the master node. This process
+can be hooked to recompute the hash only for each changed node at the same time.
+Whenever a file is read, UBIFS can verify the hashes from each leaf node up to
+the root node to ensure the node's integrity.
+
+To ensure the authenticity of the whole index, the UBIFS master node stores a
+keyed hash (HMAC) over its own contents and a hash of the root node of the index
+tree. As mentioned above, the master node is always written to the flash whenever
+the index is persisted (ie. on index commit).
+
+Using this approach only UBIFS index nodes and the master node are changed to
+include a hash. All other types of nodes will remain unchanged. This reduces
+the storage overhead which is precious for users of UBIFS (ie. embedded
+devices).
+
+
+                             +---------------+
+                             |  Master Node  |
+                             |    (hash)     |
+                             +---------------+
+                                     |
+                                     v
+                            +-------------------+
+                            |  Index Node #1    |
+                            |                   |
+                            | branch0   branchn |
+                            | (hash)    (hash)  |
+                            +-------------------+
+                               |    ...   |  (fanout: 8)
+                               |          |
+                       +-------+          +------+
+                       |                         |
+                       v                         v
+            +-------------------+       +-------------------+
+            |  Index Node #2    |       |  Index Node #3    |
+            |                   |       |                   |
+            | branch0   branchn |       | branch0   branchn |
+            | (hash)    (hash)  |       | (hash)    (hash)  |
+            +-------------------+       +-------------------+
+                 |   ...                     |   ...   |
+                 v                           v         v
+               +-----------+         +----------+  +-----------+
+               | Data Node |         | INO Node |  | DENT Node |
+               +-----------+         +----------+  +-----------+
+
+
+           Figure 3: Coverage areas of index node hash and master node HMAC
+
+
+
+The most important part for robustness and power-cut safety is to atomically
+persist the hash and file contents. Here the existing UBIFS logic for how
+changed nodes are persisted is already designed for this purpose such that
+UBIFS can safely recover if a power-cut occurs while persisting. Adding
+hashes to index nodes does not change this since each hash will be persisted
+atomically together with its respective node.
+
+
+### Journal Authentication
+
+The journal is authenticated too. Since the journal is continuously written
+it is necessary to also add authentication information frequently to the
+journal so that in case of a powercut not too much data can't be authenticated.
+This is done by creating a continuous hash beginning from the commit start node
+over the previous reference nodes, the current reference node, and the bud
+nodes. From time to time whenever it is suitable authentication nodes are added
+between the bud nodes. This new node type contains a HMAC over the current state
+of the hash chain. That way a journal can be authenticated up to the last
+authentication node. The tail of the journal which may not have a authentication
+node cannot be authenticated and is skipped during journal replay.
+
+We get this picture for journal authentication:
+
+    ,,,,,,,,
+    ,......,...........................................
+    ,. CS  ,               hash1.----.           hash2.----.
+    ,.  |  ,                    .    |hmac            .    |hmac
+    ,.  v  ,                    .    v                .    v
+    ,.REF#0,-> bud -> bud -> bud.-> auth -> bud -> bud.-> auth ...
+    ,..|...,...........................................
+    ,  |   ,
+    ,  |   ,,,,,,,,,,,,,,,
+    .  |            hash3,----.
+    ,  |                 ,    |hmac
+    ,  v                 ,    v
+    , REF#1 -> bud -> bud,-> auth ...
+    ,,,|,,,,,,,,,,,,,,,,,,
+       v
+      REF#2 -> ...
+       |
+       V
+      ...
+
+Since the hash also includes the reference nodes an attacker cannot reorder or
+skip any journal heads for replay. An attacker can only remove bud nodes or
+reference nodes from the end of the journal, effectively rewinding the
+filesystem at maximum back to the last commit.
+
+The location of the log area is stored in the master node. Since the master
+node is authenticated with a HMAC as described above, it is not possible to
+tamper with that without detection. The size of the log area is specified when
+the filesystem is created using `mkfs.ubifs` and stored in the superblock node.
+To avoid tampering with this and other values stored there, a HMAC is added to
+the superblock struct. The superblock node is stored in LEB 0 and is only
+modified on feature flag or similar changes, but never on file changes.
+
+
+### LPT Authentication
+
+The location of the LPT root node on the flash is stored in the UBIFS master
+node. Since the LPT is written and read atomically on every commit, there is
+no need to authenticate individual nodes of the tree. It suffices to
+protect the integrity of the full LPT by a simple hash stored in the master
+node. Since the master node itself is authenticated, the LPTs authenticity can
+be verified by verifying the authenticity of the master node and comparing the
+LTP hash stored there with the hash computed from the read on-flash LPT.
+
+
+## Key Management
+
+For simplicity, UBIFS authentication uses a single key to compute the HMACs
+of superblock, master, commit start and reference nodes. This key has to be
+available on creation of the filesystem (`mkfs.ubifs`) to authenticate the
+superblock node. Further, it has to be available on mount of the filesystem
+to verify authenticated nodes and generate new HMACs for changes.
+
+UBIFS authentication is intended to operate side-by-side with UBIFS encryption
+(fscrypt) to provide confidentiality and authenticity. Since UBIFS encryption
+has a different approach of encryption policies per directory, there can be
+multiple fscrypt master keys and there might be folders without encryption.
+UBIFS authentication on the other hand has an all-or-nothing approach in the
+sense that it either authenticates everything of the filesystem or nothing.
+Because of this and because UBIFS authentication should also be usable without
+encryption, it does not share the same master key with fscrypt, but manages
+a dedicated authentication key.
+
+The API for providing the authentication key has yet to be defined, but the
+key can eg. be provided by userspace through a keyring similar to the way it
+is currently done in fscrypt. It should however be noted that the current
+fscrypt approach has shown its flaws and the userspace API will eventually
+change [FSCRYPT-POLICY2].
+
+Nevertheless, it will be possible for a user to provide a single passphrase
+or key in userspace that covers UBIFS authentication and encryption. This can
+be solved by the corresponding userspace tools which derive a second key for
+authentication in addition to the derived fscrypt master key used for
+encryption.
+
+To be able to check if the proper key is available on mount, the UBIFS
+superblock node will additionally store a hash of the authentication key. This
+approach is similar to the approach proposed for fscrypt encryption policy v2
+[FSCRYPT-POLICY2].
+
+
+# Future Extensions
+
+In certain cases where a vendor wants to provide an authenticated filesystem
+image to customers, it should be possible to do so without sharing the secret
+UBIFS authentication key. Instead, in addition the each HMAC a digital
+signature could be stored where the vendor shares the public key alongside the
+filesystem image. In case this filesystem has to be modified afterwards,
+UBIFS can exchange all digital signatures with HMACs on first mount similar
+to the way the IMA/EVM subsystem deals with such situations. The HMAC key
+will then have to be provided beforehand in the normal way.
+
+
+# References
+
+[CRYPTSETUP2]        http://www.saout.de/pipermail/dm-crypt/2017-November/005745.html
+
+[DMC-CBC-ATTACK]     http://www.jakoblell.com/blog/2013/12/22/practical-malleability-attack-against-cbc-encrypted-luks-partitions/
+
+[DM-INTEGRITY]       https://www.kernel.org/doc/Documentation/device-mapper/dm-integrity.txt
+
+[DM-VERITY]          https://www.kernel.org/doc/Documentation/device-mapper/verity.txt
+
+[FSCRYPT-POLICY2]    https://www.spinics.net/lists/linux-ext4/msg58710.html
+
+[UBIFS-WP]           http://www.linux-mtd.infradead.org/doc/ubifs_whitepaper.pdf
index a0a61d2f389f409602d2ac266e7e803c22f74406..acc80442a3bbecc97c6d5ba9b516cb37cb3a7478 100644 (file)
@@ -91,6 +91,13 @@ chk_data_crc         do not skip checking CRCs on data nodes
 compr=none              override default compressor and set it to "none"
 compr=lzo               override default compressor and set it to "lzo"
 compr=zlib              override default compressor and set it to "zlib"
+auth_key=              specify the key used for authenticating the filesystem.
+                       Passing this option makes authentication mandatory.
+                       The passed key must be present in the kernel keyring
+                       and must be of type 'logon'
+auth_hash_name=                The hash algorithm used for authentication. Used for
+                       both hashing and for creating HMACs. Typical values
+                       include "sha256" or "sha512"
 
 
 Quick usage instructions
index a6c6a8af48a296cf9b7197c8f065370814efd90d..5f71a252e2e0f52b17c4fb6076baa57ae34e1ec4 100644 (file)
@@ -883,8 +883,9 @@ struct file_operations {
        unsigned (*mmap_capabilities)(struct file *);
 #endif
        ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
-       int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
-       int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
+       loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+                                  struct file *file_out, loff_t pos_out,
+                                  loff_t len, unsigned int remap_flags);
        int (*fadvise)(struct file *, loff_t, loff_t, int);
 };
 
@@ -960,11 +961,18 @@ otherwise noted.
 
   copy_file_range: called by the copy_file_range(2) system call.
 
-  clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
-       FICLONE commands.
-
-  dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
-       command.
+  remap_file_range: called by the ioctl(2) system call for FICLONERANGE and
+       FICLONE and FIDEDUPERANGE commands to remap file ranges.  An
+       implementation should remap len bytes at pos_in of the source file into
+       the dest file at pos_out.  Implementations must handle callers passing
+       in len == 0; this means "remap to the end of the source file".  The
+       return value should the number of bytes remapped, or the usual
+       negative error code if errors occurred before any bytes were remapped.
+       The remap_flags parameter accepts REMAP_FILE_* flags.  If
+       REMAP_FILE_DEDUP is set then the implementation must only remap if the
+       requested file ranges have identical contents.  If REMAP_CAN_SHORTEN is
+       set, the caller is ok with the implementation shortening the request
+       length to satisfy alignment or EOF requirements (or any other reason).
 
   fadvise: possibly called by the fadvise64() system call.
 
diff --git a/Documentation/i2c/busses/i2c-nvidia-gpu b/Documentation/i2c/busses/i2c-nvidia-gpu
new file mode 100644 (file)
index 0000000..31884d2
--- /dev/null
@@ -0,0 +1,18 @@
+Kernel driver i2c-nvidia-gpu
+
+Datasheet: not publicly available.
+
+Authors:
+       Ajay Gupta <ajayg@nvidia.com>
+
+Description
+-----------
+
+i2c-nvidia-gpu is a driver for I2C controller included in NVIDIA Turing
+and later GPUs and it is used to communicate with Type-C controller on GPUs.
+
+If your 'lspci -v' listing shows something like the following,
+
+01:00.3 Serial bus controller [0c80]: NVIDIA Corporation Device 1ad9 (rev a1)
+
+then this driver should support the I2C controller of your GPU.
index 7b6a2b2bdc98db2e794a261ff7be17dc3df0ae26..8da26c6dd886a9d9006184f4d9d5c5cf43e71b2e 100644 (file)
@@ -537,21 +537,6 @@ more details, with real examples.
        The third parameter may be a text as in this example, but it may also
        be an expanded variable or a macro.
 
-    cc-fullversion
-       cc-fullversion is useful when the exact version of gcc is needed.
-       One typical use-case is when a specific GCC version is broken.
-       cc-fullversion points out a more specific version than cc-version does.
-
-       Example:
-               #arch/powerpc/Makefile
-               $(Q)if test "$(cc-fullversion)" = "040200" ; then \
-                       echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
-                       false ; \
-               fi
-
-       In this example for a specific GCC version the build will error out
-       explaining to the user why it stops.
-
     cc-cross-prefix
        cc-cross-prefix is used to check if there exists a $(CC) in path with
        one of the listed prefixes. The first prefix where there exist a
diff --git a/Documentation/laptops/lg-laptop.rst b/Documentation/laptops/lg-laptop.rst
new file mode 100644 (file)
index 0000000..e486fe7
--- /dev/null
@@ -0,0 +1,81 @@
+.. SPDX-License-Identifier: GPL-2.0+
+LG Gram laptop extra features
+=============================
+
+By Matan Ziv-Av <matan@svgalib.org>
+
+
+Hotkeys
+-------
+
+The following FN keys are ignored by the kernel without this driver:
+- FN-F1 (LG control panel)   - Generates F15
+- FN-F5 (Touchpad toggle)    - Generates F13
+- FN-F6 (Airplane mode)      - Generates RFKILL
+- FN-F8 (Keyboard backlight) - Generates F16.
+  This key also changes keyboard backlight mode.
+- FN-F9 (Reader mode)        - Generates F14
+
+The rest of the FN key work without a need for a special driver.
+
+
+Reader mode
+-----------
+
+Writing 0/1 to /sys/devices/platform/lg-laptop/reader_mode disables/enables
+reader mode. In this mode the screen colors change (blue color reduced),
+and the reader mode indicator LED (on F9 key) turns on.
+
+
+FN Lock
+-------
+
+Writing 0/1 to /sys/devices/platform/lg-laptop/fn_lock disables/enables
+FN lock.
+
+
+Battery care limit
+------------------
+
+Writing 80/100 to /sys/devices/platform/lg-laptop/battery_care_limit
+sets the maximum capacity to charge the battery. Limiting the charge
+reduces battery capacity loss over time.
+
+This value is reset to 100 when the kernel boots.
+
+
+Fan mode
+--------
+
+Writing 1/0 to /sys/devices/platform/lg-laptop/fan_mode disables/enables
+the fan silent mode.
+
+
+USB charge
+----------
+
+Writing 0/1 to /sys/devices/platform/lg-laptop/usb_charge disables/enables
+charging another device from the USB port while the device is turned off.
+
+This value is reset to 0 when the kernel boots.
+
+
+LEDs
+~~~~
+
+The are two LED devices supported by the driver:
+
+Keyboard backlight
+------------------
+
+A led device named kbd_led controls the keyboard backlight. There are three
+lighting level: off (0), low (127) and high (255).
+
+The keyboard backlight is also controlled by the key combination FN-F8
+which cycles through those levels.
+
+
+Touchpad indicator LED
+----------------------
+
+On the F5 key. Controlled by led device names tpad_led.
index 1e4948c9e9897afb3ac0231a2c1c76ea310968ee..4d118b827bbb7ed1f9e7c221ae1c10ceee7bb779 100644 (file)
@@ -20,7 +20,7 @@ Enabling the driver
 The driver is enabled via the standard kernel configuration system,
 using the make command::
 
-  make oldconfig/silentoldconfig/menuconfig/etc.
+  make oldconfig/menuconfig/etc.
 
 The driver is located in the menu structure at:
 
index 163b5ff1073cd0a852d9ed32e06c599cefcfdd75..32b21571adfeb5bc4b5aec9b25ec14ecebc8e0e5 100644 (file)
@@ -316,6 +316,17 @@ tcp_frto - INTEGER
 
        By default it's enabled with a non-zero value. 0 disables F-RTO.
 
+tcp_fwmark_accept - BOOLEAN
+       If set, incoming connections to listening sockets that do not have a
+       socket mark will set the mark of the accepting socket to the fwmark of
+       the incoming SYN packet. This will cause all packets on that connection
+       (starting from the first SYNACK) to be sent with that fwmark. The
+       listening socket's mark is unchanged. Listening sockets that already
+       have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
+       unaffected.
+
+       Default: 0
+
 tcp_invalid_ratelimit - INTEGER
        Limit the maximal rate for sending duplicate acknowledgments
        in response to incoming TCP packets that are for an existing
index 757808526d9a8bbb2197dc54a532ea766e5c4be3..878ebfda7eeff378a2fee48e3b361aa6b3587896 100644 (file)
@@ -25,6 +25,7 @@ Below are the essential guides that every developer should read.
    code-of-conduct-interpretation
    development-process
    submitting-patches
+   programming-language
    coding-style
    maintainer-pgp-guide
    email-clients
diff --git a/Documentation/process/programming-language.rst b/Documentation/process/programming-language.rst
new file mode 100644 (file)
index 0000000..e5f5f06
--- /dev/null
@@ -0,0 +1,45 @@
+.. _programming_language:
+
+Programming Language
+====================
+
+The kernel is written in the C programming language [c-language]_.
+More precisely, the kernel is typically compiled with ``gcc`` [gcc]_
+under ``-std=gnu89`` [gcc-c-dialect-options]_: the GNU dialect of ISO C90
+(including some C99 features).
+
+This dialect contains many extensions to the language [gnu-extensions]_,
+and many of them are used within the kernel as a matter of course.
+
+There is some support for compiling the kernel with ``clang`` [clang]_
+and ``icc`` [icc]_ for several of the architectures, although at the time
+of writing it is not completed, requiring third-party patches.
+
+Attributes
+----------
+
+One of the common extensions used throughout the kernel are attributes
+[gcc-attribute-syntax]_. Attributes allow to introduce
+implementation-defined semantics to language entities (like variables,
+functions or types) without having to make significant syntactic changes
+to the language (e.g. adding a new keyword) [n2049]_.
+
+In some cases, attributes are optional (i.e. a compiler not supporting them
+should still produce proper code, even if it is slower or does not perform
+as many compile-time checks/diagnostics).
+
+The kernel defines pseudo-keywords (e.g. ``__pure``) instead of using
+directly the GNU attribute syntax (e.g. ``__attribute__((__pure__))``)
+in order to feature detect which ones can be used and/or to shorten the code.
+
+Please refer to ``include/linux/compiler_attributes.h`` for more information.
+
+.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
+.. [gcc] https://gcc.gnu.org
+.. [clang] https://clang.llvm.org
+.. [icc] https://software.intel.com/en-us/c-compilers
+.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
+
index 9ce7256c6edba8b605e9928a42159d717f6d7cf5..9521c4207f014d11f4edd33bcefdc788d0299d6a 100644 (file)
@@ -859,6 +859,7 @@ The keyctl syscall functions are:
      and either the buffer length or the OtherInfo length exceeds the
      allowed length.
 
+
   *  Restrict keyring linkage::
 
        long keyctl(KEYCTL_RESTRICT_KEYRING, key_serial_t keyring,
@@ -890,6 +891,116 @@ The keyctl syscall functions are:
      applicable to the asymmetric key type.
 
 
+  *  Query an asymmetric key::
+
+       long keyctl(KEYCTL_PKEY_QUERY,
+                   key_serial_t key_id, unsigned long reserved,
+                   struct keyctl_pkey_query *info);
+
+     Get information about an asymmetric key.  The information is returned in
+     the keyctl_pkey_query struct::
+
+       __u32   supported_ops;
+       __u32   key_size;
+       __u16   max_data_size;
+       __u16   max_sig_size;
+       __u16   max_enc_size;
+       __u16   max_dec_size;
+       __u32   __spare[10];
+
+     ``supported_ops`` contains a bit mask of flags indicating which ops are
+     supported.  This is constructed from a bitwise-OR of::
+
+       KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY}
+
+     ``key_size`` indicated the size of the key in bits.
+
+     ``max_*_size`` indicate the maximum sizes in bytes of a blob of data to be
+     signed, a signature blob, a blob to be encrypted and a blob to be
+     decrypted.
+
+     ``__spare[]`` must be set to 0.  This is intended for future use to hand
+     over one or more passphrases needed unlock a key.
+
+     If successful, 0 is returned.  If the key is not an asymmetric key,
+     EOPNOTSUPP is returned.
+
+
+  *  Encrypt, decrypt, sign or verify a blob using an asymmetric key::
+
+       long keyctl(KEYCTL_PKEY_ENCRYPT,
+                   const struct keyctl_pkey_params *params,
+                   const char *info,
+                   const void *in,
+                   void *out);
+
+       long keyctl(KEYCTL_PKEY_DECRYPT,
+                   const struct keyctl_pkey_params *params,
+                   const char *info,
+                   const void *in,
+                   void *out);
+
+       long keyctl(KEYCTL_PKEY_SIGN,
+                   const struct keyctl_pkey_params *params,
+                   const char *info,
+                   const void *in,
+                   void *out);
+
+       long keyctl(KEYCTL_PKEY_VERIFY,
+                   const struct keyctl_pkey_params *params,
+                   const char *info,
+                   const void *in,
+                   const void *in2);
+
+     Use an asymmetric key to perform a public-key cryptographic operation a
+     blob of data.  For encryption and verification, the asymmetric key may
+     only need the public parts to be available, but for decryption and signing
+     the private parts are required also.
+
+     The parameter block pointed to by params contains a number of integer
+     values::
+
+       __s32           key_id;
+       __u32           in_len;
+       __u32           out_len;
+       __u32           in2_len;
+
+     ``key_id`` is the ID of the asymmetric key to be used.  ``in_len`` and
+     ``in2_len`` indicate the amount of data in the in and in2 buffers and
+     ``out_len`` indicates the size of the out buffer as appropriate for the
+     above operations.
+
+     For a given operation, the in and out buffers are used as follows::
+
+       Operation ID            in,in_len       out,out_len     in2,in2_len
+       ======================= =============== =============== ===============
+       KEYCTL_PKEY_ENCRYPT     Raw data        Encrypted data  -
+       KEYCTL_PKEY_DECRYPT     Encrypted data  Raw data        -
+       KEYCTL_PKEY_SIGN        Raw data        Signature       -
+       KEYCTL_PKEY_VERIFY      Raw data        -               Signature
+
+     ``info`` is a string of key=value pairs that supply supplementary
+     information.  These include:
+
+       ``enc=<encoding>`` The encoding of the encrypted/signature blob.  This
+                       can be "pkcs1" for RSASSA-PKCS1-v1.5 or
+                       RSAES-PKCS1-v1.5; "pss" for "RSASSA-PSS"; "oaep" for
+                       "RSAES-OAEP".  If omitted or is "raw", the raw output
+                       of the encryption function is specified.
+
+       ``hash=<algo>`` If the data buffer contains the output of a hash
+                       function and the encoding includes some indication of
+                       which hash function was used, the hash function can be
+                       specified with this, eg. "hash=sha256".
+
+     The ``__spare[]`` space in the parameter block must be set to 0.  This is
+     intended, amongst other things, to allow the passing of passphrases
+     required to unlock a key.
+
+     If successful, encrypt, decrypt and sign all return the amount of data
+     written into the output buffer.  Verification returns 0 on success.
+
+
 Kernel Services
 ===============
 
@@ -1483,6 +1594,112 @@ The structure has a number of fields, some of which are mandatory:
      attempted key link operation. If there is no match, -EINVAL is returned.
 
 
+  *  ``int (*asym_eds_op)(struct kernel_pkey_params *params,
+                         const void *in, void *out);``
+     ``int (*asym_verify_signature)(struct kernel_pkey_params *params,
+                                   const void *in, const void *in2);``
+
+     These methods are optional.  If provided the first allows a key to be
+     used to encrypt, decrypt or sign a blob of data, and the second allows a
+     key to verify a signature.
+
+     In all cases, the following information is provided in the params block::
+
+       struct kernel_pkey_params {
+               struct key      *key;
+               const char      *encoding;
+               const char      *hash_algo;
+               char            *info;
+               __u32           in_len;
+               union {
+                       __u32   out_len;
+                       __u32   in2_len;
+               };
+               enum kernel_pkey_operation op : 8;
+       };
+
+     This includes the key to be used; a string indicating the encoding to use
+     (for instance, "pkcs1" may be used with an RSA key to indicate
+     RSASSA-PKCS1-v1.5 or RSAES-PKCS1-v1.5 encoding or "raw" if no encoding);
+     the name of the hash algorithm used to generate the data for a signature
+     (if appropriate); the sizes of the input and output (or second input)
+     buffers; and the ID of the operation to be performed.
+
+     For a given operation ID, the input and output buffers are used as
+     follows::
+
+       Operation ID            in,in_len       out,out_len     in2,in2_len
+       ======================= =============== =============== ===============
+       kernel_pkey_encrypt     Raw data        Encrypted data  -
+       kernel_pkey_decrypt     Encrypted data  Raw data        -
+       kernel_pkey_sign        Raw data        Signature       -
+       kernel_pkey_verify      Raw data        -               Signature
+
+     asym_eds_op() deals with encryption, decryption and signature creation as
+     specified by params->op.  Note that params->op is also set for
+     asym_verify_signature().
+
+     Encrypting and signature creation both take raw data in the input buffer
+     and return the encrypted result in the output buffer.  Padding may have
+     been added if an encoding was set.  In the case of signature creation,
+     depending on the encoding, the padding created may need to indicate the
+     digest algorithm - the name of which should be supplied in hash_algo.
+
+     Decryption takes encrypted data in the input buffer and returns the raw
+     data in the output buffer.  Padding will get checked and stripped off if
+     an encoding was set.
+
+     Verification takes raw data in the input buffer and the signature in the
+     second input buffer and checks that the one matches the other.  Padding
+     will be validated.  Depending on the encoding, the digest algorithm used
+     to generate the raw data may need to be indicated in hash_algo.
+
+     If successful, asym_eds_op() should return the number of bytes written
+     into the output buffer.  asym_verify_signature() should return 0.
+
+     A variety of errors may be returned, including EOPNOTSUPP if the operation
+     is not supported; EKEYREJECTED if verification fails; ENOPKG if the
+     required crypto isn't available.
+
+
+  *  ``int (*asym_query)(const struct kernel_pkey_params *params,
+                        struct kernel_pkey_query *info);``
+
+     This method is optional.  If provided it allows information about the
+     public or asymmetric key held in the key to be determined.
+
+     The parameter block is as for asym_eds_op() and co. but in_len and out_len
+     are unused.  The encoding and hash_algo fields should be used to reduce
+     the returned buffer/data sizes as appropriate.
+
+     If successful, the following information is filled in::
+
+       struct kernel_pkey_query {
+               __u32           supported_ops;
+               __u32           key_size;
+               __u16           max_data_size;
+               __u16           max_sig_size;
+               __u16           max_enc_size;
+               __u16           max_dec_size;
+       };
+
+     The supported_ops field will contain a bitmask indicating what operations
+     are supported by the key, including encryption of a blob, decryption of a
+     blob, signing a blob and verifying the signature on a blob.  The following
+     constants are defined for this::
+
+       KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY}
+
+     The key_size field is the size of the key in bits.  max_data_size and
+     max_sig_size are the maximum raw data and signature sizes for creation and
+     verification of a signature; max_enc_size and max_dec_size are the maximum
+     raw data and signature sizes for encryption and decryption.  The
+     max_*_size fields are measured in bytes.
+
+     If successful, 0 will be returned.  If the key doesn't support this,
+     EOPNOTSUPP will be returned.
+
+
 Request-Key Callback Service
 ============================
 
index e1ca698e000639720e9c718ec28613177cad189e..f584fb74b4ff2852eead1a46df316f14d3aeef69 100644 (file)
@@ -302,11 +302,11 @@ sure structure holes are cleared.
 Memory poisoning
 ----------------
 
-When releasing memory, it is best to poison the contents (clear stack on
-syscall return, wipe heap memory on a free), to avoid reuse attacks that
-rely on the old contents of memory. This frustrates many uninitialized
-variable attacks, stack content exposures, heap content exposures, and
-use-after-free attacks.
+When releasing memory, it is best to poison the contents, to avoid reuse
+attacks that rely on the old contents of memory. E.g., clear stack on a
+syscall return (``CONFIG_GCC_PLUGIN_STACKLEAK``), wipe heap memory on a
+free. This frustrates many uninitialized variable attacks, stack content
+exposures, heap content exposures, and use-after-free attacks.
 
 Destination tracking
 --------------------
index 37a679501ddc68bc0ab26c58444794c0d30c8f40..1b8775298cf7a0223c04aa7098cf1d1e4d24fefd 100644 (file)
@@ -89,6 +89,7 @@ show up in /proc/sys/kernel:
 - shmmni
 - softlockup_all_cpu_backtrace
 - soft_watchdog
+- stack_erasing
 - stop-a                      [ SPARC only ]
 - sysrq                       ==> Documentation/admin-guide/sysrq.rst
 - sysctl_writes_strict
@@ -987,6 +988,23 @@ detect a hard lockup condition.
 
 ==============================================================
 
+stack_erasing
+
+This parameter can be used to control kernel stack erasing at the end
+of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
+
+That erasing reduces the information which kernel stack leak bugs
+can reveal and blocks some uninitialized stack variable attacks.
+The tradeoff is the performance impact: on a single CPU system kernel
+compilation sees a 1% slowdown, other systems and workloads may vary.
+
+  0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+
+  1: kernel stack erasing is enabled (default), it is performed before
+     returning to the userspace at the end of syscalls.
+
+==============================================================
+
 tainted:
 
 Non-zero if the kernel has been tainted. Numeric values, which can be
index 702898633b0007a1e50670fd05c7c24d58123c6c..73aaaa3da4369e39b41bf360b24b820061a15df9 100644 (file)
@@ -146,3 +146,6 @@ Their order is preserved but their base will be offset early at boot time.
 Be very careful vs. KASLR when changing anything here. The KASLR address
 range must not overlap with anything except the KASAN shadow area, which is
 correct as KASAN disables KASLR.
+
+For both 4- and 5-level layouts, the STACKLEAK_POISON value in the last 2MB
+hole: ffffffffffff4111
index 1c0f771b859ec24b6123efc7a511525613d86199..0abecc528daca1cdaa564b36974c89b70229481f 100644 (file)
@@ -376,7 +376,7 @@ F:  drivers/platform/x86/i2c-multi-instantiate.c
 ACPI PMIC DRIVERS
 M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
 M:     Len Brown <lenb@kernel.org>
-R:     Andy Shevchenko <andy@infradead.org>
+R:     Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 R:     Mika Westerberg <mika.westerberg@linux.intel.com>
 L:     linux-acpi@vger.kernel.org
 Q:     https://patchwork.kernel.org/project/linux-acpi/list/
@@ -3737,6 +3737,11 @@ L:       platform-driver-x86@vger.kernel.org
 S:     Maintained
 F:     drivers/platform/x86/compal-laptop.c
 
+COMPILER ATTRIBUTES
+M:     Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+S:     Maintained
+F:     include/linux/compiler_attributes.h
+
 CONEXANT ACCESSRUNNER USB DRIVER
 L:     accessrunner-general@lists.sourceforge.net
 W:     http://accessrunner.sourceforge.net/
@@ -4207,6 +4212,12 @@ M:       Pali Rohár <pali.rohar@gmail.com>
 S:     Maintained
 F:     drivers/platform/x86/dell-rbtn.*
 
+DELL REMOTE BIOS UPDATE DRIVER
+M:     Stuart Hayes <stuart.w.hayes@gmail.com>
+L:     platform-driver-x86@vger.kernel.org
+S:     Maintained
+F:     drivers/platform/x86/dell_rbu.c
+
 DELL LAPTOP SMM DRIVER
 M:     Pali Rohár <pali.rohar@gmail.com>
 S:     Maintained
@@ -4214,10 +4225,11 @@ F:      drivers/hwmon/dell-smm-hwmon.c
 F:     include/uapi/linux/i8k.h
 
 DELL SYSTEMS MANAGEMENT BASE DRIVER (dcdbas)
-M:     Doug Warzecha <Douglas_Warzecha@dell.com>
+M:     Stuart Hayes <stuart.w.hayes@gmail.com>
+L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
 F:     Documentation/dcdbas.txt
-F:     drivers/firmware/dcdbas.*
+F:     drivers/platform/x86/dcdbas.*
 
 DELL WMI NOTIFICATIONS DRIVER
 M:     Matthew Garrett <mjg59@srcf.ucam.org>
@@ -5871,6 +5883,14 @@ L:       linux-i2c@vger.kernel.org
 S:     Maintained
 F:     drivers/i2c/busses/i2c-cpm.c
 
+FREESCALE IMX LPI2C DRIVER
+M:     Dong Aisheng <aisheng.dong@nxp.com>
+L:     linux-i2c@vger.kernel.org
+L:     linux-imx@nxp.com
+S:     Maintained
+F:     drivers/i2c/busses/i2c-imx-lpi2c.c
+F:     Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
+
 FREESCALE IMX / MXC FEC DRIVER
 M:     Fugang Duan <fugang.duan@nxp.com>
 L:     netdev@vger.kernel.org
@@ -6587,9 +6607,9 @@ F:        arch/*/include/asm/suspend*.h
 
 HID CORE LAYER
 M:     Jiri Kosina <jikos@kernel.org>
-R:     Benjamin Tissoires <benjamin.tissoires@redhat.com>
+M:     Benjamin Tissoires <benjamin.tissoires@redhat.com>
 L:     linux-input@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
 S:     Maintained
 F:     drivers/hid/
 F:     include/linux/hid*
@@ -6841,6 +6861,13 @@ L:       linux-acpi@vger.kernel.org
 S:     Maintained
 F:     drivers/i2c/i2c-core-acpi.c
 
+I2C CONTROLLER DRIVER FOR NVIDIA GPU
+M:     Ajay Gupta <ajayg@nvidia.com>
+L:     linux-i2c@vger.kernel.org
+S:     Maintained
+F:     Documentation/i2c/busses/i2c-nvidia-gpu
+F:     drivers/i2c/busses/i2c-nvidia-gpu.c
+
 I2C MUXES
 M:     Peter Rosin <peda@axentia.se>
 L:     linux-i2c@vger.kernel.org
@@ -7347,6 +7374,12 @@ L:       alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
 F:     sound/soc/intel/
 
+INTEL ATOMISP2 DUMMY / POWER-MANAGEMENT DRIVER
+M:     Hans de Goede <hdegoede@redhat.com>
+L:     platform-driver-x86@vger.kernel.org
+S:     Maintained
+F:     drivers/platform/x86/intel_atomisp2_pm.c
+
 INTEL C600 SERIES SAS CONTROLLER DRIVER
 M:     Intel SCU Linux support <intel-linux-scu@intel.com>
 M:     Artur Paszkiewicz <artur.paszkiewicz@intel.com>
@@ -7533,7 +7566,6 @@ M:        Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
 M:     Vishwanath Somayaji <vishwanath.somayaji@intel.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
-F:     arch/x86/include/asm/pmc_core.h
 F:     drivers/platform/x86/intel_pmc_core*
 
 INTEL PMC/P-Unit IPC DRIVER
@@ -7577,7 +7609,8 @@ F:        drivers/infiniband/hw/i40iw/
 F:     include/uapi/rdma/i40iw-abi.h
 
 INTEL TELEMETRY DRIVER
-M:     Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
+M:     Rajneesh Bhardwaj <rajneesh.bhardwaj@linux.intel.com>
+M:     "David E. Box" <david.e.box@linux.intel.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
 F:     arch/x86/include/asm/intel_telemetry.h
@@ -8310,6 +8343,14 @@ W:       http://legousb.sourceforge.net/
 S:     Maintained
 F:     drivers/usb/misc/legousbtower.c
 
+LG LAPTOP EXTRAS
+M:     Matan Ziv-Av <matan@svgalib.org>
+L:     platform-driver-x86@vger.kernel.org
+S:     Maintained
+F:     Documentation/ABI/testing/sysfs-platform-lg-laptop
+F:     Documentation/laptops/lg-laptop.rst
+F:     drivers/platform/x86/lg-laptop.c
+
 LG2160 MEDIA DRIVER
 M:     Michael Krufky <mkrufky@linuxtv.org>
 L:     linux-media@vger.kernel.org
@@ -8333,7 +8374,7 @@ F:        drivers/media/dvb-frontends/lgdt3305.*
 LIBATA PATA ARASAN COMPACT FLASH CONTROLLER
 M:     Viresh Kumar <vireshk@kernel.org>
 L:     linux-ide@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 S:     Maintained
 F:     include/linux/pata_arasan_cf_data.h
 F:     drivers/ata/pata_arasan_cf.c
@@ -8350,7 +8391,7 @@ F:        drivers/ata/ata_generic.c
 LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS
 M:     Linus Walleij <linus.walleij@linaro.org>
 L:     linux-ide@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 S:     Maintained
 F:     drivers/ata/pata_ftide010.c
 F:     drivers/ata/sata_gemini.c
@@ -8369,7 +8410,7 @@ F:        include/linux/ahci_platform.h
 LIBATA SATA PROMISE TX2/TX4 CONTROLLER DRIVER
 M:     Mikael Pettersson <mikpelinux@gmail.com>
 L:     linux-ide@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 S:     Maintained
 F:     drivers/ata/sata_promise.*
 
@@ -10750,6 +10791,14 @@ L:     linux-omap@vger.kernel.org
 S:     Maintained
 F:     arch/arm/mach-omap2/omap_hwmod.*
 
+OMAP I2C DRIVER
+M:     Vignesh R <vigneshr@ti.com>
+L:     linux-omap@vger.kernel.org
+L:     linux-i2c@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/i2c/i2c-omap.txt
+F:     drivers/i2c/busses/i2c-omap.c
+
 OMAP IMAGING SUBSYSTEM (OMAP3 ISP and OMAP4 ISS)
 M:     Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 L:     linux-media@vger.kernel.org
@@ -15402,9 +15451,9 @@ F:      include/linux/usb/gadget*
 
 USB HID/HIDBP DRIVERS (USB KEYBOARDS, MICE, REMOTE CONTROLS, ...)
 M:     Jiri Kosina <jikos@kernel.org>
-R:     Benjamin Tissoires <benjamin.tissoires@redhat.com>
+M:     Benjamin Tissoires <benjamin.tissoires@redhat.com>
 L:     linux-usb@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
 S:     Maintained
 F:     Documentation/hid/hiddev.txt
 F:     drivers/hid/usbhid/
@@ -15829,7 +15878,6 @@ F:      net/vmw_vsock/virtio_transport_common.c
 F:     net/vmw_vsock/virtio_transport.c
 F:     drivers/net/vsockmon.c
 F:     drivers/vhost/vsock.c
-F:     drivers/vhost/vsock.h
 F:     tools/testing/vsock/
 
 VIRTIO CONSOLE DRIVER
index 9aa352b38815801e37fd3cb04c66fa96511443f5..9fce8b91c15f6055f534eb18432e706a5ac3b09c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 4
-PATCHLEVEL = 19
+PATCHLEVEL = 20
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = "People's Front"
 
 # *DOCUMENTATION*
@@ -485,7 +485,7 @@ ifneq ($(KBUILD_SRC),)
        $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile $(srctree)
 endif
 
-ifeq ($(cc-name),clang)
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
 ifneq ($(CROSS_COMPILE),)
 CLANG_TARGET   := --target=$(notdir $(CROSS_COMPILE:%-=%))
 GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD)))
@@ -702,7 +702,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG)      := -fstack-protector-strong
 
 KBUILD_CFLAGS += $(stackp-flags-y)
 
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
index ed27fd26262764fc44092d639030aa7b19f53ea8..e1e540ffa9793d5279c68d9bca412e8a3ef115ae 100644 (file)
@@ -429,6 +429,13 @@ config SECCOMP_FILTER
 
          See Documentation/userspace-api/seccomp_filter.rst for details.
 
+config HAVE_ARCH_STACKLEAK
+       bool
+       help
+         An architecture should select this if it has the code which
+         fills the used part of the kernel stack with the STACKLEAK_POISON
+         value before returning from system calls.
+
 config HAVE_STACKPROTECTOR
        bool
        help
index 6a8c53dec57e6e3aa22a5be371b922ebb1bd154d..b7c77bb1bfd20368a8ff95a93d5493353e58023a 100644 (file)
 })
 
 #define user_termios_to_kernel_termios(k, u) \
-       copy_from_user(k, u, sizeof(struct termios))
+       copy_from_user(k, u, sizeof(struct termios2))
 
 #define kernel_termios_to_user_termios(u, k) \
+       copy_to_user(u, k, sizeof(struct termios2))
+
+#define user_termios_to_kernel_termios_1(k, u) \
+       copy_from_user(k, u, sizeof(struct termios))
+
+#define kernel_termios_to_user_termios_1(u, k) \
        copy_to_user(u, k, sizeof(struct termios))
 
 #endif /* _ALPHA_TERMIOS_H */
index 1e9121c9b3c74c16d129ce6fac97f614080dca94..971311605288faea94b19d23d0b346361a11a6a9 100644 (file)
 #define TCXONC         _IO('t', 30)
 #define TCFLSH         _IO('t', 31)
 
+#define TCGETS2                _IOR('T', 42, struct termios2)
+#define TCSETS2                _IOW('T', 43, struct termios2)
+#define TCSETSW2       _IOW('T', 44, struct termios2)
+#define TCSETSF2       _IOW('T', 45, struct termios2)
+
 #define TIOCSWINSZ     _IOW('t', 103, struct winsize)
 #define TIOCGWINSZ     _IOR('t', 104, struct winsize)
 #define        TIOCSTART       _IO('t', 110)           /* start output, like ^Q */
index de6c8360fbe3657e3ddf7cd6bb648a3d8b0fdb71..4575ba34a0eaeecb9b17cb9f3b6b18a698bafdfb 100644 (file)
@@ -26,6 +26,19 @@ struct termios {
        speed_t c_ospeed;               /* output speed */
 };
 
+/* Alpha has identical termios and termios2 */
+
+struct termios2 {
+       tcflag_t c_iflag;               /* input mode flags */
+       tcflag_t c_oflag;               /* output mode flags */
+       tcflag_t c_cflag;               /* control mode flags */
+       tcflag_t c_lflag;               /* local mode flags */
+       cc_t c_cc[NCCS];                /* control characters */
+       cc_t c_line;                    /* line discipline (== c_cc[19]) */
+       speed_t c_ispeed;               /* input speed */
+       speed_t c_ospeed;               /* output speed */
+};
+
 /* Alpha has matching termios and ktermios */
 
 struct ktermios {
@@ -152,6 +165,7 @@ struct ktermios {
 #define B3000000  00034
 #define B3500000  00035
 #define B4000000  00036
+#define BOTHER    00037
 
 #define CSIZE  00001400
 #define   CS5  00000000
@@ -169,6 +183,9 @@ struct ktermios {
 #define CMSPAR   010000000000          /* mark or space (stick) parity */
 #define CRTSCTS          020000000000          /* flow control */
 
+#define CIBAUD 07600000
+#define IBSHIFT        16
+
 /* c_lflag bits */
 #define ISIG   0x00000080
 #define ICANON 0x00000100
index b560ff88459bf1b74a0c093f3bdc15d2118c0182..5ff9a179c83c3326ab2dec5fdceee155021bf716 100644 (file)
@@ -55,7 +55,7 @@
        };
 
        chosen {
-               stdout-path = "&uart1:115200n8";
+               stdout-path = "serial0:115200n8";
        };
 
        memory@70000000 {
index ed9a980bce8501fcca0c3d357a8440cb8debd59d..beefa1b2049d7b56476a62429c2b4e4abf65ad33 100644 (file)
                        i2c1: i2c@21a0000 {
                                #address-cells = <1>;
                                #size-cells = <0>;
-                               compatible = "fs,imx6sll-i2c", "fsl,imx21-i2c";
+                               compatible = "fsl,imx6sll-i2c", "fsl,imx21-i2c";
                                reg = <0x021a0000 0x4000>;
                                interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clks IMX6SLL_CLK_I2C1>;
index 53b3408b5fab1845248b2b7ff078eba462f730ab..7d7d679945d28efe4f827e1b6197001088826b7f 100644 (file)
                regulator-name = "enet_3v3";
                regulator-min-microvolt = <3300000>;
                regulator-max-microvolt = <3300000>;
-               gpios = <&gpio2 6 GPIO_ACTIVE_LOW>;
+               gpio = <&gpio2 6 GPIO_ACTIVE_LOW>;
+               regulator-boot-on;
+               regulator-always-on;
        };
 
        reg_pcie_gpio: regulator-pcie-gpio {
        phy-supply = <&reg_enet_3v3>;
        phy-mode = "rgmii";
        phy-handle = <&ethphy1>;
+       phy-reset-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>;
        status = "okay";
 
        mdio {
                                MX6SX_PAD_RGMII1_RD3__ENET1_RX_DATA_3   0x3081
                                MX6SX_PAD_RGMII1_RX_CTL__ENET1_RX_EN    0x3081
                                MX6SX_PAD_ENET2_RX_CLK__ENET2_REF_CLK_25M       0x91
+                               /* phy reset */
+                               MX6SX_PAD_ENET2_CRS__GPIO2_IO_7         0x10b0
                        >;
                };
 
index c50c36baba758f4364aac78e7f973c2c0b1a65b0..8bf1c17f8cefb6b368c40c4376a7ad918c8fde10 100644 (file)
                        interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&rcc HASH1>;
                        resets = <&rcc HASH1_R>;
-                       dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0 0x0>;
+                       dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0>;
                        dma-names = "in";
                        dma-maxburst = <2>;
                        status = "disabled";
index 41ec66a969907d492dabec284e6a101f592e0216..ca62495587602f44d3e514fb2df910edfc584ea1 100644 (file)
@@ -50,8 +50,8 @@
        compatible = "fsl,vf610m4";
 
        chosen {
-               bootargs = "console=ttyLP2,115200 clk_ignore_unused init=/linuxrc rw";
-               stdout-path = "&uart2";
+               bootargs = "clk_ignore_unused init=/linuxrc rw";
+               stdout-path = "serial2:115200";
        };
 
        memory@8c000000 {
index 92fd2c8a9af0638834d6c2b5814b9a88911f33fe..12659ce5c1f38e2f166937b18957c4fbf5732c3d 100644 (file)
@@ -10,7 +10,7 @@
 #ifndef _ASM_PGTABLE_2LEVEL_H
 #define _ASM_PGTABLE_2LEVEL_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 /*
  * Hardware-wise, we have a two level page table structure, where the first
index af318d958fd2a7c9796ad2384c8c122ca04358e2..3d191fd52910f154d08b97432efc2f1701e58503 100644 (file)
@@ -773,7 +773,7 @@ static struct plat_serial8250_port ams_delta_modem_ports[] = {
        {
                .membase        = IOMEM(MODEM_VIRT),
                .mapbase        = MODEM_PHYS,
-               .irq            = -EINVAL, /* changed later */
+               .irq            = IRQ_NOTCONNECTED, /* changed later */
                .flags          = UPF_BOOT_AUTOCONF,
                .irqflags       = IRQF_TRIGGER_RISING,
                .iotype         = UPIO_MEM,
@@ -864,8 +864,7 @@ static int __init modem_nreset_init(void)
 
 
 /*
- * This function expects MODEM IRQ number already assigned to the port
- * and fails if it's not.
+ * This function expects MODEM IRQ number already assigned to the port.
  * The MODEM device requires its RESET# pin kept high during probe.
  * That requirement can be fulfilled in several ways:
  * - with a descriptor of already functional modem_nreset regulator
@@ -888,9 +887,6 @@ static int __init ams_delta_modem_init(void)
        if (!machine_is_ams_delta())
                return -ENODEV;
 
-       if (ams_delta_modem_ports[0].irq < 0)
-               return ams_delta_modem_ports[0].irq;
-
        omap_cfg_reg(M14_1510_GPIO2);
 
        /* Initialize the modem_nreset regulator consumer before use */
index 6fe52819e0148c6f3f04b11c75e278cd0b04a1f9..339eb17c9808e2c04a043485e42e5d29a49de347 100644 (file)
@@ -112,7 +112,7 @@ ENTRY(cpu_v7_hvc_switch_mm)
        hvc     #0
        ldmfd   sp!, {r0 - r3}
        b       cpu_v7_switch_mm
-ENDPROC(cpu_v7_smc_switch_mm)
+ENDPROC(cpu_v7_hvc_switch_mm)
 #endif
 ENTRY(cpu_v7_iciallu_switch_mm)
        mov     r3, #0
index 5b4ff9373c894515ee211a3ce47f745e021a7ed6..8a6880d528b6f1175bd275b7ed0946b6029718bd 100644 (file)
@@ -28,10 +28,15 @@ void __init orion_mpp_conf(unsigned int *mpp_list, unsigned int variant_mask,
                           unsigned int mpp_max, void __iomem *dev_bus)
 {
        unsigned int mpp_nr_regs = (1 + mpp_max/8);
-       u32 mpp_ctrl[mpp_nr_regs];
+       u32 mpp_ctrl[8];
        int i;
 
        printk(KERN_DEBUG "initial MPP regs:");
+       if (mpp_nr_regs > ARRAY_SIZE(mpp_ctrl)) {
+               printk(KERN_ERR "orion_mpp_conf: invalid mpp_max\n");
+               return;
+       }
+
        for (i = 0; i < mpp_nr_regs; i++) {
                mpp_ctrl[i] = readl(mpp_ctrl_addr(i, dev_bus));
                printk(" %08x", mpp_ctrl[i]);
index b4e994cd3a421d2b0ebe988b227b670039ba6bc5..6cb9fc7e9382d7f48f1b9d98f00be6b8d9df1f4a 100644 (file)
@@ -134,6 +134,7 @@ vdso_install:
 archclean:
        $(Q)$(MAKE) $(clean)=$(boot)
 
+ifeq ($(KBUILD_EXTMOD),)
 # We need to generate vdso-offsets.h before compiling certain files in kernel/.
 # In order to do that, we should use the archprepare target, but we can't since
 # asm-offsets.h is included in some files used to generate vdso-offsets.h, and
@@ -143,6 +144,7 @@ archclean:
 prepare: vdso_prepare
 vdso_prepare: prepare0
        $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
+endif
 
 define archhelp
   echo  '* Image.gz      - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
index 8253a1a9e9857112f43c24d85c5c411e653376dd..fef7351e9f677da62cd0c50e8c424a3590dd7b51 100644 (file)
                        clock-names = "stmmaceth";
                        tx-fifo-depth = <16384>;
                        rx-fifo-depth = <16384>;
+                       snps,multicast-filter-bins = <256>;
                        status = "disabled";
                };
 
                        clock-names = "stmmaceth";
                        tx-fifo-depth = <16384>;
                        rx-fifo-depth = <16384>;
+                       snps,multicast-filter-bins = <256>;
                        status = "disabled";
                };
 
                        clock-names = "stmmaceth";
                        tx-fifo-depth = <16384>;
                        rx-fifo-depth = <16384>;
+                       snps,multicast-filter-bins = <256>;
                        status = "disabled";
                };
 
index b5f2273caca4ded1e6bc0cfe3a5e52b97a3fd854..a79c8d369e0b48c4ddb3448bf4d2498acb676504 100644 (file)
                        clock-names = "fck", "brg_int", "scif_clk";
                        dmas = <&dmac1 0x35>, <&dmac1 0x34>,
                               <&dmac2 0x35>, <&dmac2 0x34>;
-                       dma-names = "tx", "rx";
+                       dma-names = "tx", "rx", "tx", "rx";
                        power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
                        resets = <&cpg 518>;
                        status = "disabled";
index fe2e2c051cc93fc0668a3d3e59a3b13432ef4766..5a7012be0d6ad953198c035df5626f2ff8ce0fe9 100644 (file)
@@ -15,7 +15,7 @@
 
        aliases {
                serial0 = &scif0;
-               ethernet0 = &avb;
+               ethernet0 = &gether;
        };
 
        chosen {
        };
 };
 
-&avb {
-       pinctrl-0 = <&avb_pins>;
-       pinctrl-names = "default";
-
-       phy-mode = "rgmii-id";
-       phy-handle = <&phy0>;
-       renesas,no-ether-link;
-       status = "okay";
-
-       phy0: ethernet-phy@0 {
-               rxc-skew-ps = <1500>;
-               reg = <0>;
-               interrupt-parent = <&gpio1>;
-               interrupts = <17 IRQ_TYPE_LEVEL_LOW>;
-       };
-};
-
 &canfd {
        pinctrl-0 = <&canfd0_pins>;
        pinctrl-names = "default";
        clock-frequency = <32768>;
 };
 
+&gether {
+       pinctrl-0 = <&gether_pins>;
+       pinctrl-names = "default";
+
+       phy-mode = "rgmii-id";
+       phy-handle = <&phy0>;
+       renesas,no-ether-link;
+       status = "okay";
+
+       phy0: ethernet-phy@0 {
+               rxc-skew-ps = <1500>;
+               reg = <0>;
+               interrupt-parent = <&gpio4>;
+               interrupts = <23 IRQ_TYPE_LEVEL_LOW>;
+       };
+};
+
 &i2c0 {
        pinctrl-0 = <&i2c0_pins>;
        pinctrl-names = "default";
 };
 
 &pfc {
-       avb_pins: avb {
-               groups = "avb_mdio", "avb_rgmii";
-               function = "avb";
-       };
-
        canfd0_pins: canfd0 {
                groups = "canfd0_data_a";
                function = "canfd0";
        };
 
+       gether_pins: gether {
+               groups = "gether_mdio_a", "gether_rgmii",
+                        "gether_txcrefclk", "gether_txcrefclk_mega";
+               function = "gether";
+       };
+
        i2c0_pins: i2c0 {
                groups = "i2c0";
                function = "i2c0";
index 3cb995606e605badbc58d977201eb10801ec3fdc..c9a57d11330b85eeb5b965ab67e2e5b8b95e3a46 100644 (file)
@@ -308,6 +308,9 @@ CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_SERIAL_MVEBU_UART=y
 CONFIG_SERIAL_DEV_BUS=y
 CONFIG_VIRTIO_CONSOLE=y
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
 CONFIG_TCG_TPM=y
 CONFIG_TCG_TIS_I2C_INFINEON=y
 CONFIG_I2C_CHARDEV=y
index 9234013e759e56a9ebd5c326cab49bd7c66df323..21a81b59a0ccd5419be92ec6e661a3e05e5820ff 100644 (file)
@@ -96,6 +96,7 @@ static inline unsigned long __percpu_##op(void *ptr,                  \
                : [val] "Ir" (val));                                    \
                break;                                                  \
        default:                                                        \
+               ret = 0;                                                \
                BUILD_BUG();                                            \
        }                                                               \
                                                                        \
@@ -125,6 +126,7 @@ static inline unsigned long __percpu_read(void *ptr, int size)
                ret = READ_ONCE(*(u64 *)ptr);
                break;
        default:
+               ret = 0;
                BUILD_BUG();
        }
 
@@ -194,6 +196,7 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
                : [val] "r" (val));
                break;
        default:
+               ret = 0;
                BUILD_BUG();
        }
 
index 3e2091708b8e51f04b90e8d6b14c586dd54afeab..6b0d4dff50125e49522212cb7e6db1a778da539d 100644 (file)
 #define KERNEL_DS      UL(-1)
 #define USER_DS                (TASK_SIZE_64 - 1)
 
+/*
+ * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
+ * no point in shifting all network buffers by 2 bytes just to make some IP
+ * header fields appear aligned in memory, potentially sacrificing some DMA
+ * performance on some platforms.
+ */
+#define NET_IP_ALIGN   0
+
 #ifndef __ASSEMBLY__
 #ifdef __KERNEL__
 
index f46d57c31443062c626e6062f4925d6206ee832b..6b5037ed15b288872d7956f58f576ab4c47424f4 100644 (file)
@@ -58,7 +58,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 /**
  * elfcorehdr_read - read from ELF core header
  * @buf: buffer where the data is placed
- * @csize: number of bytes to read
+ * @count: number of bytes to read
  * @ppos: address in the memory
  *
  * This function reads @count bytes from elf core header which exists
index 9b65132e789a5572917b7577244b008793f6ff79..2a5b338b254240c8af9d552632214df144a1215a 100644 (file)
@@ -23,7 +23,9 @@
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
 #include <linux/sched/debug.h>
+#include <linux/set_memory.h>
 #include <linux/stringify.h>
+#include <linux/vmalloc.h>
 #include <asm/traps.h>
 #include <asm/ptrace.h>
 #include <asm/cacheflush.h>
@@ -42,10 +44,21 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 static void __kprobes
 post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
 
+static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+{
+       void *addrs[1];
+       u32 insns[1];
+
+       addrs[0] = addr;
+       insns[0] = opcode;
+
+       return aarch64_insn_patch_text(addrs, insns, 1);
+}
+
 static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
 {
        /* prepare insn slot */
-       p->ainsn.api.insn[0] = cpu_to_le32(p->opcode);
+       patch_text(p->ainsn.api.insn, p->opcode);
 
        flush_icache_range((uintptr_t) (p->ainsn.api.insn),
                           (uintptr_t) (p->ainsn.api.insn) +
@@ -118,15 +131,15 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
        return 0;
 }
 
-static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+void *alloc_insn_page(void)
 {
-       void *addrs[1];
-       u32 insns[1];
+       void *page;
 
-       addrs[0] = (void *)addr;
-       insns[0] = (u32)opcode;
+       page = vmalloc_exec(PAGE_SIZE);
+       if (page)
+               set_memory_ro((unsigned long)page, 1);
 
-       return aarch64_insn_patch_text(addrs, insns, 1);
+       return page;
 }
 
 /* arm kprobe: install breakpoint in text */
index ce99c58cd1f1d2081355a7f4420072a31b43ca71..d9a4c2d6dd8b8b8031e6b552067690797eed6b6e 100644 (file)
@@ -497,25 +497,3 @@ void arch_setup_new_exec(void)
 {
        current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
 }
-
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
-void __used stackleak_check_alloca(unsigned long size)
-{
-       unsigned long stack_left;
-       unsigned long current_sp = current_stack_pointer;
-       struct stack_info info;
-
-       BUG_ON(!on_accessible_stack(current, current_sp, &info));
-
-       stack_left = current_sp - info.low;
-
-       /*
-        * There's a good chance we're almost out of stack space if this
-        * is true. Using panic() over BUG() is more likely to give
-        * reliable debugging output.
-        */
-       if (size >= stack_left)
-               panic("alloca() over the kernel stack boundary\n");
-}
-EXPORT_SYMBOL(stackleak_check_alloca);
-#endif
index 3a703e5d4e3237f9844d09e871ef1eaa62b781cc..a3ac262848451ae49535c37a6997a211b0f5e914 100644 (file)
@@ -160,6 +160,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
        __dma_unmap_area(phys_to_virt(paddr), size, dir);
 }
 
+#ifdef CONFIG_IOMMU_DMA
 static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
                                      struct page *page, size_t size)
 {
@@ -188,6 +189,7 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
 
        return ret;
 }
+#endif /* CONFIG_IOMMU_DMA */
 
 static int __init atomic_pool_init(void)
 {
index 9d9582cac6c40cad483d431682a178c67c445b45..9b432d9fcada8dac8e7b1041437387f29785b2af 100644 (file)
@@ -483,8 +483,6 @@ void __init arm64_memblock_init(void)
        high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
 
        dma_contiguous_reserve(arm64_dma_phys_limit);
-
-       memblock_allow_resize();
 }
 
 void __init bootmem_init(void)
index 394b8d554def4c3372425ed5088ee1116ef9898e..d1d6601b385d9214ceadd17ba51057ff4e023177 100644 (file)
@@ -659,6 +659,8 @@ void __init paging_init(void)
 
        memblock_free(__pa_symbol(init_pg_dir),
                      __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
+
+       memblock_allow_resize();
 }
 
 /*
index 48cf6ff9df4a36885db042048ce2aa8761c58a58..22a162cd99e8112b37681afd8934c3c3eb490f99 100644 (file)
@@ -1,9 +1 @@
-menu "C-SKY Debug Options"
-config CSKY_BUILTIN_DTB
-       string "Use kernel builtin dtb"
-       help
-         User could define the dtb instead of the one which is passed from
-         bootloader.
-         Sometimes for debug, we want to use a built-in dtb and then we needn't
-         modify bootloader at all.
-endmenu
+# dummy file, do not delete
index 67a4ae1fba2ba4601f689404e52d5ec3dd901ce1..c639fc167895d7a2f00909bf079e5ea2e6b0558c 100644 (file)
@@ -65,26 +65,15 @@ libs-y += arch/csky/lib/ \
        $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
 
 boot := arch/csky/boot
-ifneq '$(CONFIG_CSKY_BUILTIN_DTB)' '""'
 core-y += $(boot)/dts/
-endif
 
 all: zImage
 
-
-dtbs: scripts
-       $(Q)$(MAKE) $(build)=$(boot)/dts
-
-%.dtb %.dtb.S %.dtb.o: scripts
-       $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-zImage Image uImage: vmlinux dtbs
+zImage Image uImage: vmlinux
        $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 archclean:
        $(Q)$(MAKE) $(clean)=$(boot)
-       $(Q)$(MAKE) $(clean)=$(boot)/dts
-       rm -rf arch/csky/include/generated
 
 define archhelp
   echo  '* zImage       - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
index 305e81a5e91e1e5a0d622f2f399e3511fe61ca07..c57ad3c880bfb933c227fa32141b57b938be3706 100644 (file)
@@ -1,13 +1,3 @@
 dtstree        := $(srctree)/$(src)
 
-ifneq '$(CONFIG_CSKY_BUILTIN_DTB)' '""'
-builtindtb-y := $(patsubst "%",%,$(CONFIG_CSKY_BUILTIN_DTB))
-dtb-y += $(builtindtb-y).dtb
-obj-y += $(builtindtb-y).dtb.o
-.SECONDARY: $(obj)/$(builtindtb-y).dtb.S
-else
 dtb-y := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
-endif
-
-always += $(dtb-y)
-clean-files += *.dtb *.dtb.S
index 6181e4134483c26aa1a34d55e4b316ddad98f5f5..fe3ddd73a0ccb9e4fec24425164cc8c6c7f477bc 100644 (file)
  */
 #ifdef CONFIG_SUN3
 #define PTRS_PER_PTE   16
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   2048
 #elif defined(CONFIG_COLDFIRE)
 #define PTRS_PER_PTE   512
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   1024
 #else
index f64ebb9c9a413535c105e3235eb50469d51b5697..e14b6621c933e47e1f87db0114f895b39f5450ef 100644 (file)
@@ -63,7 +63,7 @@ extern int mem_init_done;
 
 #include <asm-generic/4level-fixup.h>
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
index 15a84cfd07191f95dcaa83744f76feaae3a37af2..68410490e12fdc2497104c3eeb7cdf64a8e84b8e 100644 (file)
@@ -128,7 +128,7 @@ cflags-y += -ffreestanding
 # clang's output will be based upon the build machine. So for clang we simply
 # unconditionally specify -EB or -EL as appropriate.
 #
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
 cflags-$(CONFIG_CPU_BIG_ENDIAN)                += -EB
 cflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -EL
 else
index 75108ec669ebc881c6949962ef61f6368c4a814a..6c79e8a16a2681f01cf4ffb0a702a8414499bf7b 100644 (file)
@@ -67,7 +67,7 @@ void (*cvmx_override_pko_queue_priority) (int pko_port,
 void (*cvmx_override_ipd_port_setup) (int ipd_port);
 
 /* Port count per interface */
-static int interface_port_count[5];
+static int interface_port_count[9];
 
 /**
  * Return the number of interfaces the chip has. Each interface
index e6c9485cadcffc7e0ecba01326ca3b777363edb4..cb38461391cb78c714535d2536b5cb4eed1bddad 100644 (file)
@@ -50,7 +50,7 @@ void *arch_dma_alloc(struct device *dev, size_t size,
        void *ret;
 
        ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
-       if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
+       if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
                dma_cache_wback_inv((unsigned long) ret, size);
                ret = (void *)UNCAC_ADDR(ret);
        }
index 34605ca214984c7257507fa2fb5925eb48bd9f92..58a0315ad743d5bcf965814fd17431f3f2f2bf03 100644 (file)
@@ -10,7 +10,7 @@ ccflags-vdso := \
        $(filter -march=%,$(KBUILD_CFLAGS)) \
        -D__VDSO__
 
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
 ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
 endif
 
index d3e19a55cf530046795f7c2836fbc13dc3b823fb..9f52db930c004ecc5c6de013721e06d7b4bf52a3 100644 (file)
@@ -4,7 +4,7 @@
 #ifndef _ASMNDS32_PGTABLE_H
 #define _ASMNDS32_PGTABLE_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #include <asm-generic/4level-fixup.h>
 #include <asm-generic/sizes.h>
 
index b941ac7d4e70b35181351565136a9c25e7ee66f0..c7bb74e22436079de3d9f6153e98fe47cf8a9df4 100644 (file)
@@ -111,7 +111,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #if CONFIG_PGTABLE_LEVELS == 3
 #define BITS_PER_PMD   (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
 #else
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define BITS_PER_PMD   0
 #endif
 #define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
index 2d51b2bd4aa132992f2f28be908b0571a79ae4d3..8be31261aec83190a65927854d214d613244c164 100644 (file)
@@ -930,10 +930,6 @@ config FSL_GTM
        help
          Freescale General-purpose Timers support
 
-# Yes MCA RS/6000s exist but Linux-PPC does not currently support any
-config MCA
-       bool
-
 # Platforms that what PCI turned unconditionally just do select PCI
 # in their config node.  Platforms that want to choose at config
 # time should select PPC_PCI_CHOICE
@@ -944,7 +940,6 @@ config PCI
        bool "PCI support" if PPC_PCI_CHOICE
        default y if !40x && !CPM2 && !PPC_8xx && !PPC_83xx \
                && !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON
-       default PCI_QSPAN if PPC_8xx
        select GENERIC_PCI_IOMAP
        help
          Find out whether your system includes a PCI bus. PCI is the name of
@@ -958,14 +953,6 @@ config PCI_DOMAINS
 config PCI_SYSCALL
        def_bool PCI
 
-config PCI_QSPAN
-       bool "QSpan PCI"
-       depends on PPC_8xx
-       select PPC_I8259
-       help
-         Say Y here if you have a system based on a Motorola 8xx-series
-         embedded processor with a QSPAN PCI interface, otherwise say N.
-
 config PCI_8260
        bool
        depends on PCI && 8260
index 17be664dafa2f2166d12025c8ee536d49c593b14..8a2ce14d68d077b2d62771070b40ac931d8a975a 100644 (file)
@@ -96,7 +96,7 @@ aflags-$(CONFIG_CPU_BIG_ENDIAN)               += $(call cc-option,-mabi=elfv1)
 aflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mabi=elfv2
 endif
 
-ifneq ($(cc-name),clang)
+ifndef CONFIG_CC_IS_CLANG
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)   += -mno-strict-align
 endif
 
@@ -175,7 +175,7 @@ endif
 # Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8
 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199
 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828
-ifneq ($(cc-name),clang)
+ifndef CONFIG_CC_IS_CLANG
 CC_FLAGS_FTRACE        += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog)
 endif
 endif
index 55c0210a771d1f6e45bdd85a174d31efc2c99acd..092a400740f84ecfe11344fae29e7fb0927477d0 100644 (file)
                };
 
                ethernet@f0000 {
-                       phy-handle = <&xg_cs4315_phy1>;
+                       phy-handle = <&xg_cs4315_phy2>;
                        phy-connection-type = "xgmii";
                };
 
                ethernet@f2000 {
-                       phy-handle = <&xg_cs4315_phy2>;
+                       phy-handle = <&xg_cs4315_phy1>;
                        phy-connection-type = "xgmii";
                };
 
index 5b037f51741df177cfb26ea11dd1a1e9bb093290..3aa300afbbca4a4c3e07b4079a3b4b43d1ff7def 100644 (file)
@@ -72,7 +72,7 @@
                #address-cells = <1>;
                #size-cells = <1>;
                device_type = "soc";
-               ranges = <0x0 0xff000000 0x4000>;
+               ranges = <0x0 0xff000000 0x28000>;
                bus-frequency = <0>;
 
                // Temporary -- will go away once kernel uses ranges for get_immrbase().
                                #size-cells = <0>;
                        };
                };
+
+               crypto@20000 {
+                       compatible = "fsl,sec1.2", "fsl,sec1.0";
+                       reg = <0x20000 0x8000>;
+                       interrupts = <1 1>;
+                       interrupt-parent = <&PIC>;
+                       fsl,num-channels = <1>;
+                       fsl,channel-fifo-len = <24>;
+                       fsl,exec-units-mask = <0x4c>;
+                       fsl,descriptor-types-mask = <0x05000154>;
+               };
        };
 
        chosen {
index 31733a95bbd052bda1038f8160b127181d3cffc2..3d5acd2b113a2d64b12f5f8a1487483c2cc24159 100644 (file)
@@ -36,6 +36,11 @@ int raw_patch_instruction(unsigned int *addr, unsigned int instr);
 int patch_instruction_site(s32 *addr, unsigned int instr);
 int patch_branch_site(s32 *site, unsigned long target, int flags);
 
+static inline unsigned long patch_site_addr(s32 *site)
+{
+       return (unsigned long)site + *site;
+}
+
 int instr_is_relative_branch(unsigned int instr);
 int instr_is_relative_link_branch(unsigned int instr);
 int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
index 4f547752ae79595086c9ad55a44281ea68b52505..fa05aa566ece422971533399c8921ada1af35aec 100644 (file)
  * respectively NA for All or X for Supervisor and no access for User.
  * Then we use the APG to say whether accesses are according to Page rules or
  * "all Supervisor" rules (Access to all)
- * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
- * When that bit is not set access is done iaw "all user"
- * which means no access iaw page rules.
- * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
- * 0x => No access => 11 (all accesses performed as user iaw page definition)
- * 10 => No user => 01 (all accesses performed according to page definition)
- * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * Therefore, we define 2 APG groups. lsb is _PMD_USER
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
  * We define all 16 groups so that all other bits of APG can take any value
  */
-#ifdef CONFIG_SWAP
-#define MI_APG_INIT    0xf4f4f4f4
-#else
 #define MI_APG_INIT    0x44444444
-#endif
 
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MI_RPN is written, bits in
  * Supervisor and no access for user and NA for ALL.
  * Then we use the APG to say whether accesses are according to Page rules or
  * "all Supervisor" rules (Access to all)
- * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
- * When that bit is not set access is done iaw "all user"
- * which means no access iaw page rules.
- * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
- * 0x => No access => 11 (all accesses performed as user iaw page definition)
- * 10 => No user => 01 (all accesses performed according to page definition)
- * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * Therefore, we define 2 APG groups. lsb is _PMD_USER
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
  * We define all 16 groups so that all other bits of APG can take any value
  */
-#ifdef CONFIG_SWAP
-#define MD_APG_INIT    0xf4f4f4f4
-#else
 #define MD_APG_INIT    0x44444444
-#endif
 
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MD_RPN is written, bits in
  */
 #define SPRN_M_TW      799
 
-/* APGs */
-#define M_APG0         0x00000000
-#define M_APG1         0x00000020
-#define M_APG2         0x00000040
-#define M_APG3         0x00000060
-
 #ifdef CONFIG_PPC_MM_SLICES
 #include <asm/nohash/32/slice.h>
 #define SLICE_ARRAY_SIZE       (1 << (32 - SLICE_LOW_SHIFT - 1))
@@ -251,6 +229,15 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
        BUG();
 }
 
+/* patch sites */
+extern s32 patch__itlbmiss_linmem_top;
+extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp;
+extern s32 patch__fixupdar_linmem_top;
+
+extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2;
+extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3;
+extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
+
 #endif /* !__ASSEMBLY__ */
 
 #if defined(CONFIG_PPC_4K_PAGES)
index bb38dd67d47ddba7d730eb57b5d0cf1aac30093a..1b06add4f092adb5811be46bc4b6011e5537c591 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/spinlock.h>
 #include <asm/page.h>
 #include <linux/time.h>
+#include <linux/cpumask.h>
 
 /*
  * Definitions for talking to the RTAS on CHRP machines.
index 134a573a9f2d0c61f7bf0d789826b6bdccb989f0..3b67b9533c82fe1cfee4f279cdfd1a3897f873d7 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
 #include <asm/export.h>
+#include <asm/code-patching-asm.h>
 
 #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
 /* By simply checking Address >= 0x80000000, we know if its a kernel address */
@@ -318,8 +319,8 @@ InstructionTLBMiss:
        cmpli   cr0, r11, PAGE_OFFSET@h
 #ifndef CONFIG_PIN_TLB_TEXT
        /* It is assumed that kernel code fits into the first 8M page */
-_ENTRY(ITLBMiss_cmp)
-       cmpli   cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+0:     cmpli   cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+       patch_site      0b, patch__itlbmiss_linmem_top
 #endif
 #endif
 #endif
@@ -353,13 +354,14 @@ _ENTRY(ITLBMiss_cmp)
 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
        mtcr    r12
 #endif
-
-#ifdef CONFIG_SWAP
-       rlwinm  r11, r10, 31, _PAGE_ACCESSED >> 1
-#endif
        /* Load the MI_TWC with the attributes for this "segment." */
        mtspr   SPRN_MI_TWC, r11        /* Set segment attributes */
 
+#ifdef CONFIG_SWAP
+       rlwinm  r11, r10, 32-5, _PAGE_PRESENT
+       and     r11, r11, r10
+       rlwimi  r10, r11, 0, _PAGE_PRESENT
+#endif
        li      r11, RPN_PATTERN | 0x200
        /* The Linux PTE won't go exactly into the MMU TLB.
         * Software indicator bits 20 and 23 must be clear.
@@ -372,16 +374,17 @@ _ENTRY(ITLBMiss_cmp)
        mtspr   SPRN_MI_RPN, r10        /* Update TLB entry */
 
        /* Restore registers */
-_ENTRY(itlb_miss_exit_1)
-       mfspr   r10, SPRN_SPRG_SCRATCH0
+0:     mfspr   r10, SPRN_SPRG_SCRATCH0
        mfspr   r11, SPRN_SPRG_SCRATCH1
 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
        mfspr   r12, SPRN_SPRG_SCRATCH2
 #endif
        rfi
+       patch_site      0b, patch__itlbmiss_exit_1
+
 #ifdef CONFIG_PERF_EVENTS
-_ENTRY(itlb_miss_perf)
-       lis     r10, (itlb_miss_counter - PAGE_OFFSET)@ha
+       patch_site      0f, patch__itlbmiss_perf
+0:     lis     r10, (itlb_miss_counter - PAGE_OFFSET)@ha
        lwz     r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
        addi    r11, r11, 1
        stw     r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
@@ -435,11 +438,11 @@ DataStoreTLBMiss:
 #ifndef CONFIG_PIN_TLB_IMMR
        cmpli   cr0, r11, VIRT_IMMR_BASE@h
 #endif
-_ENTRY(DTLBMiss_cmp)
-       cmpli   cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+0:     cmpli   cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+       patch_site      0b, patch__dtlbmiss_linmem_top
 #ifndef CONFIG_PIN_TLB_IMMR
-_ENTRY(DTLBMiss_jmp)
-       beq-    DTLBMissIMMR
+0:     beq-    DTLBMissIMMR
+       patch_site      0b, patch__dtlbmiss_immr_jmp
 #endif
        blt     cr7, DTLBMissLinear
        lis     r11, (swapper_pg_dir-PAGE_OFFSET)@ha
@@ -470,14 +473,22 @@ _ENTRY(DTLBMiss_jmp)
         * above.
         */
        rlwimi  r11, r10, 0, _PAGE_GUARDED
-#ifdef CONFIG_SWAP
-       /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0
-        * on that bit will represent a Non Access group
-        */
-       rlwinm  r11, r10, 31, _PAGE_ACCESSED >> 1
-#endif
        mtspr   SPRN_MD_TWC, r11
 
+       /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
+        * We also need to know if the insn is a load/store, so:
+        * Clear _PAGE_PRESENT and load that which will
+        * trap into DTLB Error with store bit set accordinly.
+        */
+       /* PRESENT=0x1, ACCESSED=0x20
+        * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
+        * r10 = (r10 & ~PRESENT) | r11;
+        */
+#ifdef CONFIG_SWAP
+       rlwinm  r11, r10, 32-5, _PAGE_PRESENT
+       and     r11, r11, r10
+       rlwimi  r10, r11, 0, _PAGE_PRESENT
+#endif
        /* The Linux PTE won't go exactly into the MMU TLB.
         * Software indicator bits 24, 25, 26, and 27 must be
         * set.  All other Linux PTE bits control the behavior
@@ -489,14 +500,16 @@ _ENTRY(DTLBMiss_jmp)
 
        /* Restore registers */
        mtspr   SPRN_DAR, r11   /* Tag DAR */
-_ENTRY(dtlb_miss_exit_1)
-       mfspr   r10, SPRN_SPRG_SCRATCH0
+
+0:     mfspr   r10, SPRN_SPRG_SCRATCH0
        mfspr   r11, SPRN_SPRG_SCRATCH1
        mfspr   r12, SPRN_SPRG_SCRATCH2
        rfi
+       patch_site      0b, patch__dtlbmiss_exit_1
+
 #ifdef CONFIG_PERF_EVENTS
-_ENTRY(dtlb_miss_perf)
-       lis     r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
+       patch_site      0f, patch__dtlbmiss_perf
+0:     lis     r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
        lwz     r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
        addi    r11, r11, 1
        stw     r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
@@ -637,8 +650,8 @@ InstructionBreakpoint:
  */
 DTLBMissIMMR:
        mtcr    r12
-       /* Set 512k byte guarded page and mark it valid and accessed */
-       li      r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2
+       /* Set 512k byte guarded page and mark it valid */
+       li      r10, MD_PS512K | MD_GUARDED | MD_SVALID
        mtspr   SPRN_MD_TWC, r10
        mfspr   r10, SPRN_IMMR                  /* Get current IMMR */
        rlwinm  r10, r10, 0, 0xfff80000         /* Get 512 kbytes boundary */
@@ -648,16 +661,17 @@ DTLBMissIMMR:
 
        li      r11, RPN_PATTERN
        mtspr   SPRN_DAR, r11   /* Tag DAR */
-_ENTRY(dtlb_miss_exit_2)
-       mfspr   r10, SPRN_SPRG_SCRATCH0
+
+0:     mfspr   r10, SPRN_SPRG_SCRATCH0
        mfspr   r11, SPRN_SPRG_SCRATCH1
        mfspr   r12, SPRN_SPRG_SCRATCH2
        rfi
+       patch_site      0b, patch__dtlbmiss_exit_2
 
 DTLBMissLinear:
        mtcr    r12
-       /* Set 8M byte page and mark it valid and accessed */
-       li      r11, MD_PS8MEG | MD_SVALID | M_APG2
+       /* Set 8M byte page and mark it valid */
+       li      r11, MD_PS8MEG | MD_SVALID
        mtspr   SPRN_MD_TWC, r11
        rlwinm  r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
        ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
@@ -666,28 +680,29 @@ DTLBMissLinear:
 
        li      r11, RPN_PATTERN
        mtspr   SPRN_DAR, r11   /* Tag DAR */
-_ENTRY(dtlb_miss_exit_3)
-       mfspr   r10, SPRN_SPRG_SCRATCH0
+
+0:     mfspr   r10, SPRN_SPRG_SCRATCH0
        mfspr   r11, SPRN_SPRG_SCRATCH1
        mfspr   r12, SPRN_SPRG_SCRATCH2
        rfi
+       patch_site      0b, patch__dtlbmiss_exit_3
 
 #ifndef CONFIG_PIN_TLB_TEXT
 ITLBMissLinear:
        mtcr    r12
-       /* Set 8M byte page and mark it valid,accessed */
-       li      r11, MI_PS8MEG | MI_SVALID | M_APG2
+       /* Set 8M byte page and mark it valid */
+       li      r11, MI_PS8MEG | MI_SVALID
        mtspr   SPRN_MI_TWC, r11
        rlwinm  r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
        ori     r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
                          _PAGE_PRESENT
        mtspr   SPRN_MI_RPN, r10        /* Update TLB entry */
 
-_ENTRY(itlb_miss_exit_2)
-       mfspr   r10, SPRN_SPRG_SCRATCH0
+0:     mfspr   r10, SPRN_SPRG_SCRATCH0
        mfspr   r11, SPRN_SPRG_SCRATCH1
        mfspr   r12, SPRN_SPRG_SCRATCH2
        rfi
+       patch_site      0b, patch__itlbmiss_exit_2
 #endif
 
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
@@ -705,8 +720,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
        mfspr   r11, SPRN_M_TW  /* Get level 1 table */
        blt+    3f
        rlwinm  r11, r10, 16, 0xfff8
-_ENTRY(FixupDAR_cmp)
-       cmpli   cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+
+0:     cmpli   cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+       patch_site      0b, patch__fixupdar_linmem_top
+
        /* create physical page address from effective address */
        tophys(r11, r10)
        blt-    cr7, 201f
@@ -960,7 +977,7 @@ initial_mmu:
        ori     r8, r8, MI_EVALID       /* Mark it valid */
        mtspr   SPRN_MI_EPN, r8
        li      r8, MI_PS8MEG /* Set 8M byte page */
-       ori     r8, r8, MI_SVALID | M_APG2      /* Make it valid, APG 2 */
+       ori     r8, r8, MI_SVALID       /* Make it valid */
        mtspr   SPRN_MI_TWC, r8
        li      r8, MI_BOOTINIT         /* Create RPN for address 0 */
        mtspr   SPRN_MI_RPN, r8         /* Store TLB entry */
@@ -987,7 +1004,7 @@ initial_mmu:
        ori     r8, r8, MD_EVALID       /* Mark it valid */
        mtspr   SPRN_MD_EPN, r8
        li      r8, MD_PS512K | MD_GUARDED      /* Set 512k byte page */
-       ori     r8, r8, MD_SVALID | M_APG2      /* Make it valid and accessed */
+       ori     r8, r8, MD_SVALID       /* Make it valid */
        mtspr   SPRN_MD_TWC, r8
        mr      r8, r9                  /* Create paddr for TLB */
        ori     r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
index 4d5322cfad25c7b543cc2e80c1f355f2d75030fc..96f34730010fe3f5f778400a14a7a470d4d38142 100644 (file)
@@ -590,12 +590,11 @@ void flush_all_to_thread(struct task_struct *tsk)
        if (tsk->thread.regs) {
                preempt_disable();
                BUG_ON(tsk != current);
-               save_all(tsk);
-
 #ifdef CONFIG_SPE
                if (tsk->thread.regs->msr & MSR_SPE)
                        tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
 #endif
+               save_all(tsk);
 
                preempt_enable();
        }
index bf8def2159c31e3e921394464e1491a5097f23b4..d65b961661fbf6d9075b34c523269bb1261845fe 100644 (file)
@@ -2337,8 +2337,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
                kvmppc_core_prepare_to_enter(vcpu);
                return;
        }
-       dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
-                  / tb_ticks_per_sec;
+       dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
        hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
        vcpu->arch.timer_running = 1;
 }
index fa888bfc347e6e6e10055cd7b2e36a6c5c4ecf2b..9f5b8c01c4e165a969d3317be6b5c5b9ae71b2f9 100644 (file)
@@ -61,11 +61,10 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 
        dec_time = vcpu->arch.dec;
        /*
-        * Guest timebase ticks at the same frequency as host decrementer.
-        * So use the host decrementer calculations for decrementer emulation.
+        * Guest timebase ticks at the same frequency as host timebase.
+        * So use the host timebase calculations for decrementer emulation.
         */
-       dec_time = dec_time << decrementer_clockevent.shift;
-       do_div(dec_time, decrementer_clockevent.mult);
+       dec_time = tb_to_ns(dec_time);
        dec_nsec = do_div(dec_time, NSEC_PER_SEC);
        hrtimer_start(&vcpu->arch.dec_timer,
                ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
index 36484a2ef9158e39b8a0fb46f84e7d070753f090..01b7f5107c3a32d0b4d4e627b10ecd0fbd380bec 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <linux/memblock.h>
+#include <linux/mmu_context.h>
 #include <asm/fixmap.h>
 #include <asm/code-patching.h>
 
@@ -79,7 +80,7 @@ void __init MMU_init_hw(void)
        for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
                mtspr(SPRN_MD_CTR, ctr | (i << 8));
                mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
-               mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2);
+               mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
                mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
                addr += LARGE_PAGE_SIZE_8M;
                mem -= LARGE_PAGE_SIZE_8M;
@@ -97,22 +98,13 @@ static void __init mmu_mapin_immr(void)
                map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
 }
 
-/* Address of instructions to patch */
-#ifndef CONFIG_PIN_TLB_IMMR
-extern unsigned int DTLBMiss_jmp;
-#endif
-extern unsigned int DTLBMiss_cmp, FixupDAR_cmp;
-#ifndef CONFIG_PIN_TLB_TEXT
-extern unsigned int ITLBMiss_cmp;
-#endif
-
-static void __init mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
+static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
 {
-       unsigned int instr = *addr;
+       unsigned int instr = *(unsigned int *)patch_site_addr(site);
 
        instr &= 0xffff0000;
        instr |= (unsigned long)__va(mapped) >> 16;
-       patch_instruction(addr, instr);
+       patch_instruction_site(site, instr);
 }
 
 unsigned long __init mmu_mapin_ram(unsigned long top)
@@ -123,17 +115,17 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
                mapped = 0;
                mmu_mapin_immr();
 #ifndef CONFIG_PIN_TLB_IMMR
-               patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP);
+               patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP);
 #endif
 #ifndef CONFIG_PIN_TLB_TEXT
-               mmu_patch_cmp_limit(&ITLBMiss_cmp, 0);
+               mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
 #endif
        } else {
                mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
        }
 
-       mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped);
-       mmu_patch_cmp_limit(&FixupDAR_cmp, mapped);
+       mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
+       mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped);
 
        /* If the size of RAM is not an exact power of two, we may not
         * have covered RAM in its entirety with 8 MiB
index 6c0020d1c5614c6610faa359ef87254e3a4c8107..e38f74e9e7a4aec85166bdcd0eeefe529c731bc9 100644 (file)
@@ -31,9 +31,6 @@
 
 extern unsigned long itlb_miss_counter, dtlb_miss_counter;
 extern atomic_t instruction_counter;
-extern unsigned int itlb_miss_perf, dtlb_miss_perf;
-extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
-extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;
 
 static atomic_t insn_ctr_ref;
 static atomic_t itlb_miss_ref;
@@ -103,22 +100,22 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
                break;
        case PERF_8xx_ID_ITLB_LOAD_MISS:
                if (atomic_inc_return(&itlb_miss_ref) == 1) {
-                       unsigned long target = (unsigned long)&itlb_miss_perf;
+                       unsigned long target = patch_site_addr(&patch__itlbmiss_perf);
 
-                       patch_branch(&itlb_miss_exit_1, target, 0);
+                       patch_branch_site(&patch__itlbmiss_exit_1, target, 0);
 #ifndef CONFIG_PIN_TLB_TEXT
-                       patch_branch(&itlb_miss_exit_2, target, 0);
+                       patch_branch_site(&patch__itlbmiss_exit_2, target, 0);
 #endif
                }
                val = itlb_miss_counter;
                break;
        case PERF_8xx_ID_DTLB_LOAD_MISS:
                if (atomic_inc_return(&dtlb_miss_ref) == 1) {
-                       unsigned long target = (unsigned long)&dtlb_miss_perf;
+                       unsigned long target = patch_site_addr(&patch__dtlbmiss_perf);
 
-                       patch_branch(&dtlb_miss_exit_1, target, 0);
-                       patch_branch(&dtlb_miss_exit_2, target, 0);
-                       patch_branch(&dtlb_miss_exit_3, target, 0);
+                       patch_branch_site(&patch__dtlbmiss_exit_1, target, 0);
+                       patch_branch_site(&patch__dtlbmiss_exit_2, target, 0);
+                       patch_branch_site(&patch__dtlbmiss_exit_3, target, 0);
                }
                val = dtlb_miss_counter;
                break;
@@ -180,17 +177,17 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
                break;
        case PERF_8xx_ID_ITLB_LOAD_MISS:
                if (atomic_dec_return(&itlb_miss_ref) == 0) {
-                       patch_instruction(&itlb_miss_exit_1, insn);
+                       patch_instruction_site(&patch__itlbmiss_exit_1, insn);
 #ifndef CONFIG_PIN_TLB_TEXT
-                       patch_instruction(&itlb_miss_exit_2, insn);
+                       patch_instruction_site(&patch__itlbmiss_exit_2, insn);
 #endif
                }
                break;
        case PERF_8xx_ID_DTLB_LOAD_MISS:
                if (atomic_dec_return(&dtlb_miss_ref) == 0) {
-                       patch_instruction(&dtlb_miss_exit_1, insn);
-                       patch_instruction(&dtlb_miss_exit_2, insn);
-                       patch_instruction(&dtlb_miss_exit_3, insn);
+                       patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
+                       patch_instruction_site(&patch__dtlbmiss_exit_2, insn);
+                       patch_instruction_site(&patch__dtlbmiss_exit_3, insn);
                }
                break;
        }
index 2a9d66254ffc58c92b9d86f663719d646734f342..5326ece361204054992c503b3540465bc6bda512 100644 (file)
@@ -29,6 +29,7 @@ config KILAUEA
        select 405EX
        select PPC40x_SIMPLE
        select PPC4xx_PCI_EXPRESS
+       select PCI
        select PCI_MSI
        select PPC4xx_MSI
        help
index f024efd5a4c2061b6989beaa4d4cda98f5e65f9e..9a85d350b1b6c7b36418b661d4fb81ccc742fa58 100644 (file)
@@ -21,6 +21,7 @@ config BLUESTONE
        depends on 44x
        select PPC44x_SIMPLE
        select APM821xx
+       select PCI
        select PCI_MSI
        select PPC4xx_MSI
        select PPC4xx_PCI_EXPRESS
@@ -200,6 +201,7 @@ config AKEBONO
        select SWIOTLB
        select 476FPE
        select PPC4xx_PCI_EXPRESS
+       select PCI
        select PCI_MSI
        select PPC4xx_HSTA_MSI
        select I2C
index 8bd590af488a1a1c379476a77dae5eb3809c44e3..794487313cc8d252af91e3ed113e51c624cff3d9 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/hugetlb.h>
 #include <asm/lppaca.h>
 #include <asm/hvcall.h>
 #include <asm/firmware.h>
@@ -36,6 +37,7 @@
 #include <asm/vio.h>
 #include <asm/mmu.h>
 #include <asm/machdep.h>
+#include <asm/drmem.h>
 
 #include "pseries.h"
 
@@ -433,6 +435,16 @@ static void parse_em_data(struct seq_file *m)
                seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
 }
 
+static void maxmem_data(struct seq_file *m)
+{
+       unsigned long maxmem = 0;
+
+       maxmem += drmem_info->n_lmbs * drmem_info->lmb_size;
+       maxmem += hugetlb_total_pages() * PAGE_SIZE;
+
+       seq_printf(m, "MaxMem=%ld\n", maxmem);
+}
+
 static int pseries_lparcfg_data(struct seq_file *m, void *v)
 {
        int partition_potential_processors;
@@ -491,6 +503,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
        seq_printf(m, "slb_size=%d\n", mmu_slb_size);
 #endif
        parse_em_data(m);
+       maxmem_data(m);
 
        return 0;
 }
index 69e7fb47bcaa3e39c81f017214e388e450b2ac2d..878f9c1d36150c80a021413f23ac36577ef881c0 100644 (file)
@@ -11,6 +11,12 @@ UBSAN_SANITIZE := n
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
 
+ifdef CONFIG_CC_IS_CLANG
+# clang stores addresses on the stack causing the frame size to blow
+# out. See https://github.com/ClangBuiltLinux/linux/issues/252
+KBUILD_CFLAGS += -Wframe-larger-than=4096
+endif
+
 ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
 
 obj-y                  += xmon.o nonstdio.o spr_access.o
index 36473d7dbaac4a602096414e20d2cad2ed296358..07fa9ea75fea1f1c72caaa6f246c28df86b6a532 100644 (file)
@@ -1,6 +1,3 @@
-CONFIG_SMP=y
-CONFIG_PCI=y
-CONFIG_PCIE_XILINX=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_IKCONFIG=y
@@ -11,10 +8,15 @@ CONFIG_CFS_BANDWIDTH=y
 CONFIG_CGROUP_BPF=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
-CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_BPF_SYSCALL=y
+CONFIG_SMP=y
+CONFIG_PCI=y
+CONFIG_PCIE_XILINX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -59,6 +61,7 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
 CONFIG_USB_UAS=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_SIFIVE_PLIC=y
 CONFIG_RAS=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -72,8 +75,5 @@ CONFIG_NFS_V4=y
 CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
-# CONFIG_RCU_TRACE is not set
 CONFIG_CRYPTO_USER_API_HASH=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_SIFIVE_PLIC=y
+# CONFIG_RCU_TRACE is not set
index 0b33577932c3bd9c552c62cfe473979987c97313..e21053e5e0da2a06c3ba78e9967e55837ecaddc0 100644 (file)
@@ -27,7 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
 UTS_MACHINE    := s390x
-STACK_SIZE     := $(if $(CONFIG_KASAN),32768,16384)
+STACK_SIZE     := $(if $(CONFIG_KASAN),65536,16384)
 CHECKFLAGS     += -D__s390__ -D__s390x__
 
 export LD_BFD
index 593039620487a6cdad8e076272b8e97cacff0153..b1bdd15e3429f39d50b0c8e73896c5539a4cfc5e 100644 (file)
@@ -22,10 +22,10 @@ OBJCOPYFLAGS :=
 OBJECTS := $(addprefix $(obj)/,$(obj-y))
 
 LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS)
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE
        $(call if_changed,ld)
 
-OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
 $(obj)/info.bin: vmlinux FORCE
        $(call if_changed,objcopy)
 
@@ -46,17 +46,17 @@ suffix-$(CONFIG_KERNEL_LZMA)  := .lzma
 suffix-$(CONFIG_KERNEL_LZO)  := .lzo
 suffix-$(CONFIG_KERNEL_XZ)  := .xz
 
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,lz4)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,lzma)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,lzo)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,xzkern)
 
 OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
index 259d1698ac50a468021e17a6a2fbe93526f520f2..c69cb04b7a5948e56535a145cb788de06fa4bed8 100644 (file)
@@ -64,6 +64,8 @@ CONFIG_NUMA=y
 CONFIG_PREEMPT=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
@@ -84,9 +86,11 @@ CONFIG_PCI_DEBUG=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_CHSC_SCH=y
+CONFIG_VFIO_AP=m
 CONFIG_CRASH_DUMP=y
 CONFIG_BINFMT_MISC=m
 CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
@@ -161,8 +165,6 @@ CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
 CONFIG_NF_CT_NETLINK_TIMEOUT=m
 CONFIG_NF_TABLES=m
-CONFIG_NFT_EXTHDR=m
-CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
@@ -365,6 +367,8 @@ CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
 CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
@@ -461,6 +465,7 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
 CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -486,9 +491,12 @@ CONFIG_MLX4_INFINIBAND=m
 CONFIG_MLX5_INFINIBAND=m
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
+CONFIG_VFIO_MDEV=m
+CONFIG_VFIO_MDEV_DEVICE=m
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
+CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
@@ -615,7 +623,6 @@ CONFIG_DEBUG_CREDENTIALS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=300
 CONFIG_NOTIFIER_ERROR_INJECTION=m
-CONFIG_PM_NOTIFIER_ERROR_INJECT=m
 CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
 CONFIG_FAULT_INJECTION=y
 CONFIG_FAILSLAB=y
@@ -727,3 +734,4 @@ CONFIG_APPLDATA_BASE=y
 CONFIG_KVM=m
 CONFIG_KVM_S390_UCONTROL=y
 CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
index 37fd60c20e22dec8cd8452baaf89135debccf735..32f539dc9c19240d589a5cb62fb51e0a30d9baf5 100644 (file)
@@ -65,6 +65,8 @@ CONFIG_NR_CPUS=512
 CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
@@ -82,9 +84,11 @@ CONFIG_PCI=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_CHSC_SCH=y
+CONFIG_VFIO_AP=m
 CONFIG_CRASH_DUMP=y
 CONFIG_BINFMT_MISC=m
 CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
@@ -159,8 +163,6 @@ CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
 CONFIG_NF_CT_NETLINK_TIMEOUT=m
 CONFIG_NF_TABLES=m
-CONFIG_NFT_EXTHDR=m
-CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
@@ -362,6 +364,8 @@ CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
 CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
@@ -458,6 +462,7 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
 CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -483,9 +488,12 @@ CONFIG_MLX4_INFINIBAND=m
 CONFIG_MLX5_INFINIBAND=m
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
+CONFIG_VFIO_MDEV=m
+CONFIG_VFIO_MDEV_DEVICE=m
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
+CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
@@ -666,3 +674,4 @@ CONFIG_APPLDATA_BASE=y
 CONFIG_KVM=m
 CONFIG_KVM_S390_UCONTROL=y
 CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
index 7cb6a52f727dafc6c994423b0db21ccafec4993a..4d58a92b5d979f15e3469240c47a8e6f5fc4c189 100644 (file)
@@ -26,14 +26,23 @@ CONFIG_CGROUP_CPUACCT=y
 CONFIG_CGROUP_PERF=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 # CONFIG_SYSFS_SYSCALL is not set
-CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
+CONFIG_LIVEPATCH=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC_FILE=y
+CONFIG_CRASH_DUMP=y
+CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
+CONFIG_CMM=m
 CONFIG_OPROFILE=y
 CONFIG_KPROBES=y
 CONFIG_JUMP_LABEL=y
@@ -44,11 +53,7 @@ CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_DEFAULT_DEADLINE=y
-CONFIG_LIVEPATCH=y
-CONFIG_NR_CPUS=256
-CONFIG_NUMA=y
-CONFIG_HZ_100=y
-CONFIG_KEXEC_FILE=y
+CONFIG_BINFMT_MISC=m
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
@@ -60,9 +65,6 @@ CONFIG_ZBUD=m
 CONFIG_ZSMALLOC=m
 CONFIG_ZSMALLOC_STAT=y
 CONFIG_IDLE_PAGE_TRACKING=y
-CONFIG_CRASH_DUMP=y
-CONFIG_BINFMT_MISC=m
-CONFIG_HIBERNATION=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -98,6 +100,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_SCSI=y
+# CONFIG_SCSI_MQ_DEFAULT is not set
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=y
 CONFIG_BLK_DEV_SR=y
@@ -131,6 +134,7 @@ CONFIG_EQUALIZER=m
 CONFIG_TUN=m
 CONFIG_VIRTIO_NET=y
 # CONFIG_NET_VENDOR_ALACRITECH is not set
+# CONFIG_NET_VENDOR_AURORA is not set
 # CONFIG_NET_VENDOR_CORTINA is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SOCIONEXT is not set
@@ -157,33 +161,6 @@ CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_GDB_SCRIPTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_DEBUG_SECTION_MISMATCH=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_PAGEALLOC=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PROVE_LOCKING=y
-CONFIG_LOCK_STAT=y
-CONFIG_DEBUG_LOCKDEP=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_SG=y
-CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-# CONFIG_RUNTIME_TESTING_MENU is not set
-CONFIG_S390_PTDUMP=y
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_AUTHENC=m
 CONFIG_CRYPTO_TEST=m
@@ -193,6 +170,7 @@ CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_CMAC=m
@@ -231,7 +209,6 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_ZCRYPT=m
-CONFIG_ZCRYPT_MULTIDEVNODES=y
 CONFIG_PKEY=m
 CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_SHA1_S390=m
@@ -247,4 +224,30 @@ CONFIG_CRC7=m
 # CONFIG_XZ_DEC_ARM is not set
 # CONFIG_XZ_DEC_ARMTHUMB is not set
 # CONFIG_XZ_DEC_SPARC is not set
-CONFIG_CMM=m
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FUNCTION_PROFILER=y
+# CONFIG_RUNTIME_TESTING_MENU is not set
+CONFIG_S390_PTDUMP=y
index dbd689d556ce5dd9368392a1e0676c18163acc3c..ccbb53e2202404b85aae86e883d3e64405d2d305 100644 (file)
@@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk,
                mm->context.asce_limit = STACK_TOP_MAX;
                mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
                                   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
-               /* pgd_alloc() did not account this pud */
-               mm_inc_nr_puds(mm);
                break;
        case -PAGE_SIZE:
                /* forked 5-level task, set new asce with new_mm->pgd */
@@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk,
                /* forked 2-level compat task, set new asce with new mm->pgd */
                mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
                                   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-               /* pgd_alloc() did not account this pmd */
-               mm_inc_nr_pmds(mm);
-               mm_inc_nr_puds(mm);
        }
        crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
        return 0;
index f0f9bcf94c03749b0f0030d9de5765cff1597d37..5ee733720a5716b2308210d497f9c8ab73485cfa 100644 (file)
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry)
 
 static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 {
-       if (mm->context.asce_limit <= _REGION3_SIZE)
+       if (mm_pmd_folded(mm))
                return _SEGMENT_ENTRY_EMPTY;
-       if (mm->context.asce_limit <= _REGION2_SIZE)
+       if (mm_pud_folded(mm))
                return _REGION3_ENTRY_EMPTY;
-       if (mm->context.asce_limit <= _REGION1_SIZE)
+       if (mm_p4d_folded(mm))
                return _REGION2_ENTRY_EMPTY;
        return _REGION1_ENTRY_EMPTY;
 }
index 411d435e7a7d2a5a8c650c812017d66f9738710a..063732414dfbb5076c431d13e694e239e878ebef 100644 (file)
@@ -493,6 +493,24 @@ static inline int is_module_addr(void *addr)
                                   _REGION_ENTRY_PROTECT | \
                                   _REGION_ENTRY_NOEXEC)
 
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+       return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+       return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+       return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
index 302795c47c06c299b732ed73de7b057a71b3805c..81038ab357ce955682b713f0c4241611ba5f931f 100644 (file)
@@ -236,7 +236,7 @@ static inline unsigned long current_stack_pointer(void)
        return sp;
 }
 
-static __no_sanitize_address_or_inline unsigned short stap(void)
+static __no_kasan_or_inline unsigned short stap(void)
 {
        unsigned short cpu_address;
 
@@ -330,7 +330,7 @@ static inline void __load_psw(psw_t psw)
  * Set PSW mask to specified value, while leaving the
  * PSW addr pointing to the next instruction.
  */
-static __no_sanitize_address_or_inline void __load_psw_mask(unsigned long mask)
+static __no_kasan_or_inline void __load_psw_mask(unsigned long mask)
 {
        unsigned long addr;
        psw_t psw;
index 27248f42a03c4561a9e1481fbea205b3b866f928..ce4e17c9aad6fa266d306676df4e7cdc69eb7df0 100644 (file)
@@ -14,7 +14,7 @@
  * General size of kernel stacks
  */
 #ifdef CONFIG_KASAN
-#define THREAD_SIZE_ORDER 3
+#define THREAD_SIZE_ORDER 4
 #else
 #define THREAD_SIZE_ORDER 2
 #endif
index 457b7ba0fbb66de24fd82219e18a51ad2663221f..b31c779cf58176ad3bf91ee816053cbcf40b3476 100644 (file)
@@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
                                unsigned long address)
 {
-       if (tlb->mm->context.asce_limit <= _REGION3_SIZE)
+       if (mm_pmd_folded(tlb->mm))
                return;
        pgtable_pmd_page_dtor(virt_to_page(pmd));
        tlb_remove_table(tlb, pmd);
@@ -152,7 +152,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
                                unsigned long address)
 {
-       if (tlb->mm->context.asce_limit <= _REGION1_SIZE)
+       if (mm_p4d_folded(tlb->mm))
                return;
        tlb_remove_table(tlb, p4d);
 }
@@ -167,7 +167,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
                                unsigned long address)
 {
-       if (tlb->mm->context.asce_limit <= _REGION2_SIZE)
+       if (mm_pud_folded(tlb->mm))
                return;
        tlb_remove_table(tlb, pud);
 }
index 724fba4d09d2df3a35c372224ddc944c9def3ace..39191a0feed1cdedd692e68826b3b19db581cbbb 100644 (file)
@@ -236,10 +236,10 @@ ENTRY(__switch_to)
        stmg    %r6,%r15,__SF_GPRS(%r15)        # store gprs of prev task
        lghi    %r4,__TASK_stack
        lghi    %r1,__TASK_thread
-       lg      %r5,0(%r4,%r3)                  # start of kernel stack of next
+       llill   %r5,STACK_INIT
        stg     %r15,__THREAD_ksp(%r1,%r2)      # store kernel stack of prev
-       lgr     %r15,%r5
-       aghi    %r15,STACK_INIT                 # end of kernel stack of next
+       lg      %r15,0(%r4,%r3)                 # start of kernel stack of next
+       agr     %r15,%r5                        # end of kernel stack of next
        stg     %r3,__LC_CURRENT                # store task struct of next
        stg     %r15,__LC_KERNEL_STACK          # store end of kernel stack
        lg      %r15,__THREAD_ksp(%r1,%r3)      # load kernel stack of next
index cc085e2d2ce9907690fbe0912dd301ab44e8171d..74091fd3101e9122943b9155572a1c46d2bf9858 100644 (file)
@@ -373,7 +373,7 @@ static int __hw_perf_event_init(struct perf_event *event)
                return -ENOENT;
 
        if (ev > PERF_CPUM_CF_MAX_CTR)
-               return -EINVAL;
+               return -ENOENT;
 
        /* Obtain the counter set to which the specified counter belongs */
        set = get_counter_set(ev);
index 7bf604ff50a1bd082024c85fb5d32e06cca9c4f8..bfabeb1889cc0cca5c6859cb36bbbeb15b662049 100644 (file)
@@ -1842,10 +1842,30 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags)
 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
 
-static struct attribute *cpumsf_pmu_events_attr[] = {
-       CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
-       NULL,
-       NULL,
+/* Attribute list for CPU_SF.
+ *
+ * The availablitiy depends on the CPU_MF sampling facility authorization
+ * for basic + diagnositic samples. This is determined at initialization
+ * time by the sampling facility device driver.
+ * If the authorization for basic samples is turned off, it should be
+ * also turned off for diagnostic sampling.
+ *
+ * During initialization of the device driver, check the authorization
+ * level for diagnostic sampling and installs the attribute
+ * file for diagnostic sampling if necessary.
+ *
+ * For now install a placeholder to reference all possible attributes:
+ * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG.
+ * Add another entry for the final NULL pointer.
+ */
+enum {
+       SF_CYCLES_BASIC_ATTR_IDX = 0,
+       SF_CYCLES_BASIC_DIAG_ATTR_IDX,
+       SF_CYCLES_ATTR_MAX
+};
+
+static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = {
+       [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC)
 };
 
 PMU_FORMAT_ATTR(event, "config:0-63");
@@ -2040,7 +2060,10 @@ static int __init init_cpum_sampling_pmu(void)
 
        if (si.ad) {
                sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
-               cpumsf_pmu_events_attr[1] =
+               /* Sampling of diagnostic data authorized,
+                * install event into attribute list of PMU device.
+                */
+               cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] =
                        CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
        }
 
index eb8aebea3ea7bd7a6967136b6cb9aee3e25473aa..e76309fbbcb3b6e23af21350f98f2b555502b978 100644 (file)
@@ -37,7 +37,7 @@ KASAN_SANITIZE := n
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
        $(call if_changed,vdso32ld)
 
 # strip rule for the .so file
@@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
        $(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
        $(call if_changed_dep,vdso32as)
 
 # actual build commands
 quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
 
index a22b2cf86eec985d7f3bf32da11f5f0c220c28e7..f849ac61c5da02ee8b764bc3c01fc44c16137e04 100644 (file)
@@ -37,7 +37,7 @@ KASAN_SANITIZE := n
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
        $(call if_changed,vdso64ld)
 
 # strip rule for the .so file
@@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
        $(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
+$(obj-vdso64): %.o: %.S FORCE
        $(call if_changed_dep,vdso64as)
 
 # actual build commands
 quiet_cmd_vdso64ld = VDSO64L $@
-      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
 quiet_cmd_vdso64as = VDSO64A $@
       cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
 
index 21eb7407d51bac8e71f3743defba1f7de5291e3d..8429ab07971575394622444ea6be40eb85b37f62 100644 (file)
@@ -154,14 +154,14 @@ SECTIONS
         * uncompressed image info used by the decompressor
         * it should match struct vmlinux_info
         */
-       .vmlinux.info 0 : {
+       .vmlinux.info 0 (INFO) : {
                QUAD(_stext)                                    /* default_lma */
                QUAD(startup_continue)                          /* entry */
                QUAD(__bss_start - _stext)                      /* image_size */
                QUAD(__bss_stop - __bss_start)                  /* bss_size */
                QUAD(__boot_data_start)                         /* bootdata_off */
                QUAD(__boot_data_end - __boot_data_start)       /* bootdata_size */
-       }
+       } :NONE
 
        /* Debugging sections.  */
        STABS_DEBUG
index 76d89ee8b428837fc6c32f962d0104787caa29a3..814f26520aa2c2439de4e10ce52bf0476c8f2661 100644 (file)
@@ -101,6 +101,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
                        mm->context.asce_limit = _REGION1_SIZE;
                        mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
                                _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+                       mm_inc_nr_puds(mm);
                } else {
                        crst_table_init(table, _REGION1_ENTRY_EMPTY);
                        pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
index ae0d9e889534cd880f750845fb58d919080e9325..d31bde0870d894bdc2cd3a3006d966924d1d5c1c 100644 (file)
@@ -53,6 +53,7 @@ int __node_distance(int a, int b)
 {
        return mode->distance ? mode->distance(a, b) : 0;
 }
+EXPORT_SYMBOL(__node_distance);
 
 int numa_debug_enabled;
 
index 67b3e6b3ce5d7cf8b417d361c5bbaadce92cc1e0..47c871394ccb1602d59bca5a3459a7e088df98e0 100644 (file)
@@ -1849,16 +1849,12 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 {
        u64 saved_fault_address = current_thread_info()->fault_address;
        u8 saved_fault_code = get_thread_fault_code();
-       mm_segment_t old_fs;
 
        perf_callchain_store(entry, regs->tpc);
 
        if (!current->mm)
                return;
 
-       old_fs = get_fs();
-       set_fs(USER_DS);
-
        flushw_user();
 
        pagefault_disable();
@@ -1870,7 +1866,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 
        pagefault_enable();
 
-       set_fs(old_fs);
        set_thread_fault_code(saved_fault_code);
        current_thread_info()->fault_address = saved_fault_address;
 }
index bb68c805b891855e18af6397ce534f74d5550a4d..ff9389a1c9f3f68c5acaa32123d65c5fedbf9846 100644 (file)
@@ -47,9 +47,9 @@ sys_call_table32:
        .word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
 /*130*/        .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown
        .word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
-/*140*/        .word sys_sendfile64, sys_nis_syscall, compat_sys_futex, sys_gettid, compat_sys_getrlimit
+/*140*/        .word sys_sendfile64, sys_getpeername, compat_sys_futex, sys_gettid, compat_sys_getrlimit
        .word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
-/*150*/        .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+/*150*/        .word sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
        .word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount
 /*160*/        .word compat_sys_sched_setaffinity, compat_sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall
        .word sys_quotactl, sys_set_tid_address, compat_sys_mount, compat_sys_ustat, sys_setxattr
index 74c002ddc0ce74868286b77f43dfa6885e6c3e70..28c40624bcb6f0e9b15030037d6f199b46c5fa0f 100644 (file)
@@ -1305,6 +1305,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
                io_req->fds[0] = dev->cow.fd;
        else
                io_req->fds[0] = dev->fd;
+       io_req->error = 0;
 
        if (req_op(req) == REQ_OP_FLUSH) {
                io_req->op = UBD_FLUSH;
@@ -1313,9 +1314,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
                io_req->cow_offset = -1;
                io_req->offset = off;
                io_req->length = bvec->bv_len;
-               io_req->error = 0;
                io_req->sector_mask = 0;
-
                io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
                io_req->offsets[0] = 0;
                io_req->offsets[1] = dev->cow.data_offset;
@@ -1341,11 +1340,14 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
                                 const struct blk_mq_queue_data *bd)
 {
+       struct ubd *ubd_dev = hctx->queue->queuedata;
        struct request *req = bd->rq;
        int ret = 0;
 
        blk_mq_start_request(req);
 
+       spin_lock_irq(&ubd_dev->lock);
+
        if (req_op(req) == REQ_OP_FLUSH) {
                ret = ubd_queue_one_vec(hctx, req, 0, NULL);
        } else {
@@ -1361,9 +1363,11 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
                }
        }
 out:
-       if (ret < 0) {
+       spin_unlock_irq(&ubd_dev->lock);
+
+       if (ret < 0)
                blk_mq_requeue_request(req, true);
-       }
+
        return BLK_STS_OK;
 }
 
index c51c989c19c08da99155d354cc11558c1cdb36d4..ba7e3464ee9235fe43f0edd66034d670b2fc4ffd 100644 (file)
@@ -129,6 +129,7 @@ config X86
        select HAVE_ARCH_PREL32_RELOCATIONS
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_THREAD_STRUCT_WHITELIST
+       select HAVE_ARCH_STACKLEAK
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
index 8f0c4c9fc90433d83e6c82b7ba14d55370d3d267..51079fc9298fc0f1a255fca2d2702a4f8b14fb84 100644 (file)
@@ -113,7 +113,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 {
        int err;
 
-       memset(&cpu.flags, 0, sizeof cpu.flags);
+       memset(&cpu.flags, 0, sizeof(cpu.flags));
        cpu.level = 3;
 
        if (has_eflag(X86_EFLAGS_AC))
index b25c53527a9409490ff4077ae3f12d96b53a669d..023bf1c3de8b7a1d1d5b22c9dd1c9312ed07ad70 100644 (file)
@@ -50,7 +50,7 @@ static void parse_earlyprintk(void)
        int pos = 0;
        int port = 0;
 
-       if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) {
+       if (cmdline_find_option("earlyprintk", arg, sizeof(arg)) > 0) {
                char *e;
 
                if (!strncmp(arg, "serial", 6)) {
@@ -124,7 +124,7 @@ static void parse_console_uart8250(void)
         * console=uart8250,io,0x3f8,115200n8
         * need to make sure it is last one console !
         */
-       if (cmdline_find_option("console", optstr, sizeof optstr) <= 0)
+       if (cmdline_find_option("console", optstr, sizeof(optstr)) <= 0)
                return;
 
        options = optstr;
index 223e42527077d26c818d7e0ec7259d98717dbd61..6c176b6a42ad0c7f51baaeb55f6dff7d0fa533af 100644 (file)
@@ -76,7 +76,7 @@ static int get_edd_info(u8 devno, struct edd_info *ei)
 {
        struct biosregs ireg, oreg;
 
-       memset(ei, 0, sizeof *ei);
+       memset(ei, 0, sizeof(*ei));
 
        /* Check Extensions Present */
 
@@ -133,7 +133,7 @@ void query_edd(void)
        struct edd_info ei, *edp;
        u32 *mbrptr;
 
-       if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) {
+       if (cmdline_find_option("edd", eddarg, sizeof(eddarg)) > 0) {
                if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) {
                        do_edd = 1;
                        do_mbr = 0;
@@ -166,7 +166,7 @@ void query_edd(void)
                 */
                if (!get_edd_info(devno, &ei)
                    && boot_params.eddbuf_entries < EDDMAXNR) {
-                       memcpy(edp, &ei, sizeof ei);
+                       memcpy(edp, &ei, sizeof(ei));
                        edp++;
                        boot_params.eddbuf_entries++;
                }
index 9bcea386db65e3bbce1db97a8b7254249fd89ed7..73532543d68924c40374f61c94453bb16e816e25 100644 (file)
@@ -36,8 +36,8 @@ static void copy_boot_params(void)
        const struct old_cmdline * const oldcmd =
                (const struct old_cmdline *)OLD_CL_ADDRESS;
 
-       BUILD_BUG_ON(sizeof boot_params != 4096);
-       memcpy(&boot_params.hdr, &hdr, sizeof hdr);
+       BUILD_BUG_ON(sizeof(boot_params) != 4096);
+       memcpy(&boot_params.hdr, &hdr, sizeof(hdr));
 
        if (!boot_params.hdr.cmd_line_ptr &&
            oldcmd->cl_magic == OLD_CL_MAGIC) {
index d9c28c87e4771ffadf68091800b50142417389ba..7df2b28207be6c589d2478d130c924ebdd0073d7 100644 (file)
@@ -26,7 +26,7 @@ static int detect_memory_e820(void)
 
        initregs(&ireg);
        ireg.ax  = 0xe820;
-       ireg.cx  = sizeof buf;
+       ireg.cx  = sizeof(buf);
        ireg.edx = SMAP;
        ireg.di  = (size_t)&buf;
 
index c0fb356a3092e55f9f70aba1eea1bd8db74d77ae..2fe3616ba16138e51db4d6db47a225456b1c0e13 100644 (file)
@@ -21,7 +21,7 @@
 
 void initregs(struct biosregs *reg)
 {
-       memset(reg, 0, sizeof *reg);
+       memset(reg, 0, sizeof(*reg));
        reg->eflags |= X86_EFLAGS_CF;
        reg->ds = ds();
        reg->es = ds();
index ba3e100654db0239622a3f23f5d9d64855ebffd0..3ecc11a9c44040153521cdbfeafc37714999f5bc 100644 (file)
@@ -62,7 +62,7 @@ static int vesa_probe(void)
                if (mode & ~0x1ff)
                        continue;
 
-               memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
+               memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */
 
                ireg.ax = 0x4f01;
                ireg.cx = mode;
@@ -109,7 +109,7 @@ static int vesa_set_mode(struct mode_info *mode)
        int is_graphic;
        u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
 
-       memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
+       memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */
 
        initregs(&ireg);
        ireg.ax = 0x4f01;
@@ -241,7 +241,7 @@ void vesa_store_edid(void)
        struct biosregs ireg, oreg;
 
        /* Apparently used as a nonsense token... */
-       memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
+       memset(&boot_params.edid_info, 0x13, sizeof(boot_params.edid_info));
 
        if (vginfo.version < 0x0200)
                return;         /* EDID requires VBE 2.0+ */
index 77780e386e9b224ef8ec5421af644abc65d1eebd..ac89b6624a4053b4eeea6b16d18ed5f083fba2d4 100644 (file)
@@ -115,7 +115,7 @@ static unsigned int get_entry(void)
                } else if ((key >= '0' && key <= '9') ||
                           (key >= 'A' && key <= 'Z') ||
                           (key >= 'a' && key <= 'z')) {
-                       if (len < sizeof entry_buf) {
+                       if (len < sizeof(entry_buf)) {
                                entry_buf[len++] = key;
                                putchar(key);
                        }
index 708b46a54578d8722fc1c9fa07e58d74f7ff49d8..25e5a6bda8c3a971609dff93919ccab27d6a3aa9 100644 (file)
@@ -329,8 +329,22 @@ For 32-bit we have the following conventions - kernel is built with
 
 #endif
 
+.macro STACKLEAK_ERASE_NOCLOBBER
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+       PUSH_AND_CLEAR_REGS
+       call stackleak_erase
+       POP_REGS
+#endif
+.endm
+
 #endif /* CONFIG_X86_64 */
 
+.macro STACKLEAK_ERASE
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+       call stackleak_erase
+#endif
+.endm
+
 /*
  * This does 'call enter_from_user_mode' unless we can avoid it based on
  * kernel config or using the static jump infrastructure.
index 687e47f8a796621d4effcac9a055965969a81dc2..d309f30cf7af84e67ac38910eff4256da9c25a11 100644 (file)
@@ -46,6 +46,8 @@
 #include <asm/frame.h>
 #include <asm/nospec-branch.h>
 
+#include "calling.h"
+
        .section .entry.text, "ax"
 
 /*
@@ -712,6 +714,7 @@ ENTRY(ret_from_fork)
        /* When we fork, we trace the syscall return in the child, too. */
        movl    %esp, %eax
        call    syscall_return_slowpath
+       STACKLEAK_ERASE
        jmp     restore_all
 
        /* kernel thread */
@@ -886,6 +889,8 @@ ENTRY(entry_SYSENTER_32)
        ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
                    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 
+       STACKLEAK_ERASE
+
 /* Opportunistic SYSEXIT */
        TRACE_IRQS_ON                   /* User mode traces as IRQs on. */
 
@@ -997,6 +1002,8 @@ ENTRY(entry_INT80_32)
        call    do_int80_syscall_32
 .Lsyscall_32_done:
 
+       STACKLEAK_ERASE
+
 restore_all:
        TRACE_IRQS_IRET
        SWITCH_TO_ENTRY_STACK
index 4d7a2d9d44cfec5928b902cef1bca9bca29093a6..ce25d84023c021ce25f041cd81497500f20c3a60 100644 (file)
@@ -266,6 +266,8 @@ syscall_return_via_sysret:
         * We are on the trampoline stack.  All regs except RDI are live.
         * We can do future final exit work right here.
         */
+       STACKLEAK_ERASE_NOCLOBBER
+
        SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
 
        popq    %rdi
@@ -625,6 +627,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
         * We are on the trampoline stack.  All regs except RDI are live.
         * We can do future final exit work right here.
         */
+       STACKLEAK_ERASE_NOCLOBBER
 
        SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
 
index 7d0df78db727296d1c4451e3a930033669f47aa3..8eaf8952c408cd619124f9696b4888fae2f529ad 100644 (file)
@@ -261,6 +261,11 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 
        /* Opportunistic SYSRET */
 sysret32_from_system_call:
+       /*
+        * We are not going to return to userspace from the trampoline
+        * stack. So let's erase the thread stack right now.
+        */
+       STACKLEAK_ERASE
        TRACE_IRQS_ON                   /* User mode traces as IRQs on. */
        movq    RBX(%rsp), %rbx         /* pt_regs->rbx */
        movq    RBP(%rsp), %rbp         /* pt_regs->rbp */
index 0fb8659b20d8d76fd974406873df8416359ab157..273c62e8154632f524e04e16630bcdb6c958b8c0 100644 (file)
@@ -4535,7 +4535,7 @@ __init int intel_pmu_init(void)
                }
        }
 
-       snprintf(pmu_name_str, sizeof pmu_name_str, "%s", name);
+       snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
 
        if (version >= 2 && extra_attr) {
                x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
index fab4df16a3c43737b27368e3b67926a767bfa663..22c4dfe6599230378a3c9ac2ac6816a352390060 100644 (file)
@@ -217,11 +217,18 @@ static inline bool in_x32_syscall(void)
        return false;
 }
 
-static inline bool in_compat_syscall(void)
+static inline bool in_32bit_syscall(void)
 {
        return in_ia32_syscall() || in_x32_syscall();
 }
+
+#ifdef CONFIG_COMPAT
+static inline bool in_compat_syscall(void)
+{
+       return in_32bit_syscall();
+}
 #define in_compat_syscall in_compat_syscall    /* override the generic impl */
+#endif
 
 struct compat_siginfo;
 int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
index 89a048c2faec7f8a818d1a461ccd7fa67eca0fd9..28c4a502b4197cce9ae968deb8ea2fe7797e8da4 100644 (file)
 #define X86_FEATURE_LA57               (16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID              (16*32+22) /* RDPID instruction */
 #define X86_FEATURE_CLDEMOTE           (16*32+25) /* CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI            (16*32+27) /* MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B          (16*32+28) /* MOVDIR64B instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV     (17*32+ 0) /* MCA overflow recovery support */
index c18ed65287d5eda607c009bf747fc67f81743dfc..cf350639e76d1312a9c75f0ab21dfb31e0014afe 100644 (file)
@@ -76,9 +76,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
 #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1
 static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
 {
-       if (in_compat_syscall())
-               return true;
-       return false;
+       return in_32bit_syscall();
 }
 #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
 #endif /* !COMPILE_OFFSETS */
index fba54ca23b2a9f8be320f81cf3cc17a8cf550a0e..26942ad63830407255afc9e6de77267056a97135 100644 (file)
@@ -361,7 +361,6 @@ extern struct paravirt_patch_template pv_ops;
        __visible extern const char start_##ops##_##name[], end_##ops##_##name[];       \
        asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
 
-unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
 unsigned paravirt_patch_default(u8 type, void *insnbuf,
                                unsigned long addr, unsigned len);
@@ -651,7 +650,6 @@ void paravirt_leave_lazy_mmu(void);
 void paravirt_flush_lazy_mmu(void);
 
 void _paravirt_nop(void);
-u32 _paravirt_ident_32(u32);
 u64 _paravirt_ident_64(u64);
 
 #define paravirt_nop   ((void *)_paravirt_nop)
index 323a313947e01a6bfc2cb19b2d47ca16f1b9d8d3..d760611cfc351d3850bd85ae2e124c3e17c9e765 100644 (file)
@@ -453,6 +453,12 @@ static inline void __native_flush_tlb_one_user(unsigned long addr)
  */
 static inline void __flush_tlb_all(void)
 {
+       /*
+        * This is to catch users with enabled preemption and the PGE feature
+        * and don't trigger the warning in __native_flush_tlb().
+        */
+       VM_WARN_ON_ONCE(preemptible());
+
        if (boot_cpu_has(X86_FEATURE_PGE)) {
                __flush_tlb_global();
        } else {
index 123e669bf363d375820ba3ab2ce981f01aa4329e..790ce08e41f20f4b16a9c085204ea877d6db5e5e 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/mm.h>
 #include <linux/device.h>
 
-#include <linux/uaccess.h>
+#include <asm/extable.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
@@ -93,12 +93,39 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
  */
 static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
 {
-       return __put_user(val, (unsigned long __user *)addr);
+       int ret = 0;
+
+       asm volatile("1: mov %[val], %[ptr]\n"
+                    "2:\n"
+                    ".section .fixup, \"ax\"\n"
+                    "3: sub $1, %[ret]\n"
+                    "   jmp 2b\n"
+                    ".previous\n"
+                    _ASM_EXTABLE(1b, 3b)
+                    : [ret] "+r" (ret), [ptr] "=m" (*addr)
+                    : [val] "r" (val));
+
+       return ret;
 }
 
-static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
+static inline int xen_safe_read_ulong(const unsigned long *addr,
+                                     unsigned long *val)
 {
-       return __get_user(*val, (unsigned long __user *)addr);
+       int ret = 0;
+       unsigned long rval = ~0ul;
+
+       asm volatile("1: mov %[ptr], %[rval]\n"
+                    "2:\n"
+                    ".section .fixup, \"ax\"\n"
+                    "3: sub $1, %[ret]\n"
+                    "   jmp 2b\n"
+                    ".previous\n"
+                    _ASM_EXTABLE(1b, 3b)
+                    : [ret] "+r" (ret), [rval] "+r" (rval)
+                    : [ptr] "m" (*addr));
+       *val = rval;
+
+       return ret;
 }
 
 #ifdef CONFIG_XEN_PV
index cbbd57ae06ee2af4b1028c95462209a2c348306f..ffb181f959d2b221759b6c85deded0930f2ff3cc 100644 (file)
@@ -1074,7 +1074,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 #endif
        c->x86_cache_alignment = c->x86_clflush_size;
 
-       memset(&c->x86_capability, 0, sizeof c->x86_capability);
+       memset(&c->x86_capability, 0, sizeof(c->x86_capability));
        c->extended_cpuid_level = 0;
 
        if (!have_cpuid_p())
@@ -1317,7 +1317,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
        c->x86_virt_bits = 32;
 #endif
        c->x86_cache_alignment = c->x86_clflush_size;
-       memset(&c->x86_capability, 0, sizeof c->x86_capability);
+       memset(&c->x86_capability, 0, sizeof(c->x86_capability));
 
        generic_identify(c);
 
index 8cb3c02980cfa72f9d6c810f84f080565c296400..8c66d2fc8f81dd1d2404f22e5a06ac696ee8fad9 100644 (file)
@@ -2215,7 +2215,7 @@ static int mce_device_create(unsigned int cpu)
        if (dev)
                return 0;
 
-       dev = kzalloc(sizeof *dev, GFP_KERNEL);
+       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return -ENOMEM;
        dev->id  = cpu;
index b9bc8a1a584e39590e7beecdafb47773015794f7..2637ff09d6a0da6b7e25cd3942ecb05650fd51e5 100644 (file)
@@ -666,8 +666,8 @@ static ssize_t pf_show(struct device *dev,
 }
 
 static DEVICE_ATTR_WO(reload);
-static DEVICE_ATTR(version, 0400, version_show, NULL);
-static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
+static DEVICE_ATTR(version, 0444, version_show, NULL);
+static DEVICE_ATTR(processor_flags, 0444, pf_show, NULL);
 
 static struct attribute *mc_default_attrs[] = {
        &dev_attr_version.attr,
index e12ee86906c6250faa05b13ae9bb9c3dc545558f..86e277f8daf420b1d6b1f82992cc536776a50865 100644 (file)
@@ -798,7 +798,7 @@ static void generic_set_all(void)
        local_irq_restore(flags);
 
        /* Use the atomic bitops to update the global mask */
-       for (count = 0; count < sizeof mask * 8; ++count) {
+       for (count = 0; count < sizeof(mask) * 8; ++count) {
                if (mask & 0x01)
                        set_bit(count, &smp_changes_mask);
                mask >>= 1;
index 40eee6cc412484470daba013f2a197439163707a..2e173d47b450d4febbb9e2028f153bc91382b915 100644 (file)
@@ -174,12 +174,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
        case MTRRIOC_SET_PAGE_ENTRY:
        case MTRRIOC_DEL_PAGE_ENTRY:
        case MTRRIOC_KILL_PAGE_ENTRY:
-               if (copy_from_user(&sentry, arg, sizeof sentry))
+               if (copy_from_user(&sentry, arg, sizeof(sentry)))
                        return -EFAULT;
                break;
        case MTRRIOC_GET_ENTRY:
        case MTRRIOC_GET_PAGE_ENTRY:
-               if (copy_from_user(&gentry, arg, sizeof gentry))
+               if (copy_from_user(&gentry, arg, sizeof(gentry)))
                        return -EFAULT;
                break;
 #ifdef CONFIG_COMPAT
@@ -332,7 +332,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
        switch (cmd) {
        case MTRRIOC_GET_ENTRY:
        case MTRRIOC_GET_PAGE_ENTRY:
-               if (copy_to_user(arg, &gentry, sizeof gentry))
+               if (copy_to_user(arg, &gentry, sizeof(gentry)))
                        err = -EFAULT;
                break;
 #ifdef CONFIG_COMPAT
index 5e801c8c8ce7cfaf191505fc9feaae5414f7270b..374a52fa529694f7399ad59e0b4ec2c1d598c636 100644 (file)
@@ -213,8 +213,9 @@ static unsigned int mem32_serial_in(unsigned long addr, int offset)
  * early_pci_serial_init()
  *
  * This function is invoked when the early_printk param starts with "pciserial"
- * The rest of the param should be ",B:D.F,baud" where B, D & F describe the
- * location of a PCI device that must be a UART device.
+ * The rest of the param should be "[force],B:D.F,baud", where B, D & F describe
+ * the location of a PCI device that must be a UART device. "force" is optional
+ * and overrides the use of an UART device with a wrong PCI class code.
  */
 static __init void early_pci_serial_init(char *s)
 {
@@ -224,17 +225,23 @@ static __init void early_pci_serial_init(char *s)
        u32 classcode, bar0;
        u16 cmdreg;
        char *e;
+       int force = 0;
 
-
-       /*
-        * First, part the param to get the BDF values
-        */
        if (*s == ',')
                ++s;
 
        if (*s == 0)
                return;
 
+       /* Force the use of an UART device with wrong class code */
+       if (!strncmp(s, "force,", 6)) {
+               force = 1;
+               s += 6;
+       }
+
+       /*
+        * Part the param to get the BDF values
+        */
        bus = (u8)simple_strtoul(s, &e, 16);
        s = e;
        if (*s != ':')
@@ -253,7 +260,7 @@ static __init void early_pci_serial_init(char *s)
                s++;
 
        /*
-        * Second, find the device from the BDF
+        * Find the device from the BDF
         */
        cmdreg = read_pci_config(bus, slot, func, PCI_COMMAND);
        classcode = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
@@ -264,8 +271,10 @@ static __init void early_pci_serial_init(char *s)
         */
        if (((classcode >> 16 != PCI_CLASS_COMMUNICATION_MODEM) &&
             (classcode >> 16 != PCI_CLASS_COMMUNICATION_SERIAL)) ||
-          (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */
-               return;
+          (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ {
+               if (!force)
+                       return;
+       }
 
        /*
         * Determine if it is IO or memory mapped
@@ -289,7 +298,7 @@ static __init void early_pci_serial_init(char *s)
        }
 
        /*
-        * Lastly, initialize the hardware
+        * Initialize the hardware
         */
        if (*s) {
                if (strcmp(s, "nocfg") == 0)
index 5dc377dc9d7b5a25027e2d6358696bff2f56d635..7663a8eb602bcf8332a04c91508dc84710504701 100644 (file)
@@ -385,7 +385,7 @@ static void __init copy_bootdata(char *real_mode_data)
         */
        sme_map_bootdata(real_mode_data);
 
-       memcpy(&boot_params, real_mode_data, sizeof boot_params);
+       memcpy(&boot_params, real_mode_data, sizeof(boot_params));
        sanitize_boot_params(&boot_params);
        cmd_line_ptr = get_cmd_line_ptr();
        if (cmd_line_ptr) {
index ef688804f80d33088fef15448996a97f69e2b193..4588414e2561ccc9d0d883c01fc400f2e9bdd1bf 100644 (file)
@@ -115,14 +115,14 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
                        err = -EBADF;
                        break;
                }
-               if (copy_from_user(&regs, uregs, sizeof regs)) {
+               if (copy_from_user(&regs, uregs, sizeof(regs))) {
                        err = -EFAULT;
                        break;
                }
                err = rdmsr_safe_regs_on_cpu(cpu, regs);
                if (err)
                        break;
-               if (copy_to_user(uregs, &regs, sizeof regs))
+               if (copy_to_user(uregs, &regs, sizeof(regs)))
                        err = -EFAULT;
                break;
 
@@ -131,14 +131,14 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
                        err = -EBADF;
                        break;
                }
-               if (copy_from_user(&regs, uregs, sizeof regs)) {
+               if (copy_from_user(&regs, uregs, sizeof(regs))) {
                        err = -EFAULT;
                        break;
                }
                err = wrmsr_safe_regs_on_cpu(cpu, regs);
                if (err)
                        break;
-               if (copy_to_user(uregs, &regs, sizeof regs))
+               if (copy_to_user(uregs, &regs, sizeof(regs)))
                        err = -EFAULT;
                break;
 
index e4d4df37922a3c7eb91adc67903d1a2bf59ba032..c0e0101133f352ba6a8ac8369eef15a3e5301be3 100644 (file)
@@ -56,17 +56,6 @@ asm (".pushsection .entry.text, \"ax\"\n"
      ".type _paravirt_nop, @function\n\t"
      ".popsection");
 
-/* identity function, which can be inlined */
-u32 notrace _paravirt_ident_32(u32 x)
-{
-       return x;
-}
-
-u64 notrace _paravirt_ident_64(u64 x)
-{
-       return x;
-}
-
 void __init default_banner(void)
 {
        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -102,6 +91,12 @@ static unsigned paravirt_patch_call(void *insnbuf, const void *target,
 }
 
 #ifdef CONFIG_PARAVIRT_XXL
+/* identity function, which can be inlined */
+u64 notrace _paravirt_ident_64(u64 x)
+{
+       return x;
+}
+
 static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
                                   unsigned long addr, unsigned len)
 {
@@ -146,13 +141,11 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf,
        else if (opfunc == _paravirt_nop)
                ret = 0;
 
+#ifdef CONFIG_PARAVIRT_XXL
        /* identity functions just return their single argument */
-       else if (opfunc == _paravirt_ident_32)
-               ret = paravirt_patch_ident_32(insnbuf, len);
        else if (opfunc == _paravirt_ident_64)
                ret = paravirt_patch_ident_64(insnbuf, len);
 
-#ifdef CONFIG_PARAVIRT_XXL
        else if (type == PARAVIRT_PATCH(cpu.iret) ||
                 type == PARAVIRT_PATCH(cpu.usergs_sysret64))
                /* If operation requires a jmp, then jmp */
@@ -309,13 +302,8 @@ struct pv_info pv_info = {
 #endif
 };
 
-#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
-/* 32-bit pagetable entries */
-#define PTE_IDENT      __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
-#else
 /* 64-bit pagetable entries */
 #define PTE_IDENT      __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
-#endif
 
 struct paravirt_patch_template pv_ops = {
        /* Init ops. */
@@ -483,5 +471,5 @@ NOKPROBE_SYMBOL(native_set_debugreg);
 NOKPROBE_SYMBOL(native_load_idt);
 #endif
 
-EXPORT_SYMBOL_GPL(pv_ops);
+EXPORT_SYMBOL(pv_ops);
 EXPORT_SYMBOL_GPL(pv_info);
index 6368c22fa1fa3b438627d39a63cd738106cb6074..de138d3912e45972b432832793a015b39886c2bc 100644 (file)
@@ -10,24 +10,18 @@ DEF_NATIVE(cpu, iret, "iret");
 DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
 DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
 DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
-{
-       /* arg in %eax, return in %eax */
-       return 0;
-}
 
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 {
        /* arg in %edx:%eax, return in %edx:%eax */
        return 0;
 }
+#endif
+
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
+#endif
 
 extern bool pv_is_native_spin_unlock(void);
 extern bool pv_is_native_vcpu_is_preempted(void);
index 7ca9cb726f4d669e110f7de0ba0086f5e6f0526d..9d9e04b310773789f8894a4900f6e1c6d0ab2e6d 100644 (file)
@@ -15,27 +15,19 @@ DEF_NATIVE(cpu, wbinvd, "wbinvd");
 
 DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
 DEF_NATIVE(cpu, swapgs, "swapgs");
-#endif
-
-DEF_NATIVE(, mov32, "mov %edi, %eax");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
-{
-       return paravirt_patch_insns(insnbuf, len,
-                                   start__mov32, end__mov32);
-}
-
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 {
        return paravirt_patch_insns(insnbuf, len,
                                    start__mov64, end__mov64);
 }
+#endif
+
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
+#endif
 
 extern bool pv_is_native_spin_unlock(void);
 extern bool pv_is_native_vcpu_is_preempted(void);
index 31b4755369f084575f6b3a0ec30b340392106f70..0e0b4288a4b2b5ba4e8273038268fc9f777ca010 100644 (file)
@@ -701,10 +701,10 @@ static void __set_personality_x32(void)
                current->mm->context.ia32_compat = TIF_X32;
        current->personality &= ~READ_IMPLIES_EXEC;
        /*
-        * in_compat_syscall() uses the presence of the x32 syscall bit
+        * in_32bit_syscall() uses the presence of the x32 syscall bit
         * flag to determine compat status.  The x86 mmap() code relies on
         * the syscall bitness so set x32 syscall bit right here to make
-        * in_compat_syscall() work during exec().
+        * in_32bit_syscall() work during exec().
         *
         * Pretend to come from a x32 execve.
         */
index 6a78d4b36a7974ad4d4b4d75500c566529e4d227..f7476ce23b6e0f03cea661324f21f7a89e31295f 100644 (file)
@@ -105,7 +105,7 @@ out:
 static void find_start_end(unsigned long addr, unsigned long flags,
                unsigned long *begin, unsigned long *end)
 {
-       if (!in_compat_syscall() && (flags & MAP_32BIT)) {
+       if (!in_32bit_syscall() && (flags & MAP_32BIT)) {
                /* This is usually used needed to map code in small
                   model, so it needs to be in the first 31bit. Limit
                   it to that.  This means we need to move the
@@ -122,7 +122,7 @@ static void find_start_end(unsigned long addr, unsigned long flags,
        }
 
        *begin  = get_mmap_base(1);
-       if (in_compat_syscall())
+       if (in_32bit_syscall())
                *end = task_size_32bit();
        else
                *end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
@@ -193,7 +193,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                return addr;
 
        /* for MAP_32BIT mappings we force the legacy mmap base */
-       if (!in_compat_syscall() && (flags & MAP_32BIT))
+       if (!in_32bit_syscall() && (flags & MAP_32BIT))
                goto bottomup;
 
        /* requesting a specific address */
@@ -217,9 +217,10 @@ get_unmapped_area:
         * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
         * in the full address space.
         *
-        * !in_compat_syscall() check to avoid high addresses for x32.
+        * !in_32bit_syscall() check to avoid high addresses for x32
+        * (and make it no op on native i386).
         */
-       if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+       if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
                info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
 
        info.align_mask = 0;
index 8f6dcd88202e89d3c7630d034f6d044a712a9fb1..9b7c4ca8f0a7358ce19741de1c875a75e8f52f8e 100644 (file)
@@ -306,7 +306,7 @@ __visible void __noreturn handle_stack_overflow(const char *message,
        die(message, regs, 0);
 
        /* Be absolutely certain we don't return. */
-       panic(message);
+       panic("%s", message);
 }
 #endif
 
index 34edf198708f76883d5fa099bfe3683dbf52c753..78e430f4e15cfa3b745882aa5de21fdc14f3277d 100644 (file)
@@ -1509,7 +1509,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
                return emulate_gp(ctxt, index << 3 | 0x2);
 
        addr = dt.address + index * 8;
-       return linear_read_system(ctxt, addr, desc, sizeof *desc);
+       return linear_read_system(ctxt, addr, desc, sizeof(*desc));
 }
 
 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
@@ -1522,7 +1522,7 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
                struct desc_struct desc;
                u16 sel;
 
-               memset (dt, 0, sizeof *dt);
+               memset(dt, 0, sizeof(*dt));
                if (!ops->get_segment(ctxt, &sel, &desc, &base3,
                                      VCPU_SREG_LDTR))
                        return;
@@ -1586,7 +1586,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
        if (rc != X86EMUL_CONTINUE)
                return rc;
 
-       return linear_write_system(ctxt, addr, desc, sizeof *desc);
+       return linear_write_system(ctxt, addr, desc, sizeof(*desc));
 }
 
 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -1604,7 +1604,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
        u16 dummy;
        u32 base3 = 0;
 
-       memset(&seg_desc, 0, sizeof seg_desc);
+       memset(&seg_desc, 0, sizeof(seg_desc));
 
        if (ctxt->mode == X86EMUL_MODE_REAL) {
                /* set real mode segment descriptor (keep limit etc. for
@@ -3075,17 +3075,17 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
        int ret;
        u32 new_tss_base = get_desc_base(new_desc);
 
-       ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
+       ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
        save_state_to_tss16(ctxt, &tss_seg);
 
-       ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
+       ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
-       ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
+       ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
@@ -3094,7 +3094,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 
                ret = linear_write_system(ctxt, new_tss_base,
                                          &tss_seg.prev_task_link,
-                                         sizeof tss_seg.prev_task_link);
+                                         sizeof(tss_seg.prev_task_link));
                if (ret != X86EMUL_CONTINUE)
                        return ret;
        }
@@ -3216,7 +3216,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
        u32 eip_offset = offsetof(struct tss_segment_32, eip);
        u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
 
-       ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
+       ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
@@ -3228,7 +3228,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
-       ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
+       ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
@@ -3237,7 +3237,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 
                ret = linear_write_system(ctxt, new_tss_base,
                                          &tss_seg.prev_task_link,
-                                         sizeof tss_seg.prev_task_link);
+                                         sizeof(tss_seg.prev_task_link));
                if (ret != X86EMUL_CONTINUE)
                        return ret;
        }
index 3cd227ff807fadd27284e6bc81ef2c6f8a60f3d1..89db20f8cb707666494c906bafedf958508ed12d 100644 (file)
@@ -2409,7 +2409,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
        r = kvm_apic_state_fixup(vcpu, s, true);
        if (r)
                return r;
-       memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
+       memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
 
        recalculate_apic_map(vcpu->kvm);
        kvm_apic_set_version(vcpu);
index 66d66d77caee5c7b761ae4eda2f0caf4643987e1..5cd5647120f2b439a8d63b52d223499b48e5ff9e 100644 (file)
@@ -2924,7 +2924,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
        unsigned size;
 
        r = -EFAULT;
-       if (copy_from_user(&msrs, user_msrs, sizeof msrs))
+       if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
                goto out;
 
        r = -E2BIG;
@@ -3091,11 +3091,11 @@ long kvm_arch_dev_ioctl(struct file *filp,
                unsigned n;
 
                r = -EFAULT;
-               if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
+               if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
                        goto out;
                n = msr_list.nmsrs;
                msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
-               if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
+               if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
                        goto out;
                r = -E2BIG;
                if (n < msr_list.nmsrs)
@@ -3117,7 +3117,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
                struct kvm_cpuid2 cpuid;
 
                r = -EFAULT;
-               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
                        goto out;
 
                r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
@@ -3126,7 +3126,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
                        goto out;
 
                r = -EFAULT;
-               if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+               if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
                        goto out;
                r = 0;
                break;
@@ -3894,7 +3894,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                struct kvm_interrupt irq;
 
                r = -EFAULT;
-               if (copy_from_user(&irq, argp, sizeof irq))
+               if (copy_from_user(&irq, argp, sizeof(irq)))
                        goto out;
                r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
                break;
@@ -3912,7 +3912,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                struct kvm_cpuid cpuid;
 
                r = -EFAULT;
-               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
                        goto out;
                r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
                break;
@@ -3922,7 +3922,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                struct kvm_cpuid2 cpuid;
 
                r = -EFAULT;
-               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
                        goto out;
                r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
                                              cpuid_arg->entries);
@@ -3933,14 +3933,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                struct kvm_cpuid2 cpuid;
 
                r = -EFAULT;
-               if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
                        goto out;
                r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
                                              cpuid_arg->entries);
                if (r)
                        goto out;
                r = -EFAULT;
-               if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+               if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
                        goto out;
                r = 0;
                break;
@@ -3961,13 +3961,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                struct kvm_tpr_access_ctl tac;
 
                r = -EFAULT;
-               if (copy_from_user(&tac, argp, sizeof tac))
+               if (copy_from_user(&tac, argp, sizeof(tac)))
                        goto out;
                r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
                if (r)
                        goto out;
                r = -EFAULT;
-               if (copy_to_user(argp, &tac, sizeof tac))
+               if (copy_to_user(argp, &tac, sizeof(tac)))
                        goto out;
                r = 0;
                break;
@@ -3980,7 +3980,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                if (!lapic_in_kernel(vcpu))
                        goto out;
                r = -EFAULT;
-               if (copy_from_user(&va, argp, sizeof va))
+               if (copy_from_user(&va, argp, sizeof(va)))
                        goto out;
                idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
@@ -3991,7 +3991,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                u64 mcg_cap;
 
                r = -EFAULT;
-               if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
+               if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
                        goto out;
                r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
                break;
@@ -4000,7 +4000,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                struct kvm_x86_mce mce;
 
                r = -EFAULT;
-               if (copy_from_user(&mce, argp, sizeof mce))
+               if (copy_from_user(&mce, argp, sizeof(mce)))
                        goto out;
                r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
                break;
@@ -4536,7 +4536,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
                if (kvm->created_vcpus)
                        goto set_identity_unlock;
                r = -EFAULT;
-               if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
+               if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
                        goto set_identity_unlock;
                r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
 set_identity_unlock:
@@ -4620,7 +4620,7 @@ set_identity_unlock:
                if (r)
                        goto get_irqchip_out;
                r = -EFAULT;
-               if (copy_to_user(argp, chip, sizeof *chip))
+               if (copy_to_user(argp, chip, sizeof(*chip)))
                        goto get_irqchip_out;
                r = 0;
        get_irqchip_out:
@@ -4666,7 +4666,7 @@ set_identity_unlock:
        }
        case KVM_SET_PIT: {
                r = -EFAULT;
-               if (copy_from_user(&u.ps, argp, sizeof u.ps))
+               if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
                        goto out;
                r = -ENXIO;
                if (!kvm->arch.vpit)
@@ -8205,7 +8205,7 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        sregs->efer = vcpu->arch.efer;
        sregs->apic_base = kvm_get_apic_base(vcpu);
 
-       memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
+       memset(sregs->interrupt_bitmap, 0, sizeof(sregs->interrupt_bitmap));
 
        if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
                set_bit(vcpu->arch.interrupt.nr,
@@ -8509,7 +8509,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
        fpu->last_opcode = fxsave->fop;
        fpu->last_ip = fxsave->rip;
        fpu->last_dp = fxsave->rdp;
-       memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
+       memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
 
        vcpu_put(vcpu);
        return 0;
@@ -8530,7 +8530,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
        fxsave->fop = fpu->last_opcode;
        fxsave->rip = fpu->last_ip;
        fxsave->rdp = fpu->last_dp;
-       memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
+       memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
 
        vcpu_put(vcpu);
        return 0;
index 00b296617ca436c3cea79edcbb0a94d034ee52a3..92e4c4b85bbaadec13e54fb614e8a5dc369e2ff5 100644 (file)
@@ -92,7 +92,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
         * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
         * in the full address space.
         */
-       info.high_limit = in_compat_syscall() ?
+       info.high_limit = in_32bit_syscall() ?
                task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
 
        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
@@ -116,7 +116,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
         * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
         * in the full address space.
         */
-       if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+       if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
                info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
 
        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
index 1e95d57760cf79becf81c012df564e42ae7122e2..db316571452145f50832ba56ff7fb49214d4ae02 100644 (file)
@@ -166,7 +166,7 @@ unsigned long get_mmap_base(int is_legacy)
        struct mm_struct *mm = current->mm;
 
 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
-       if (in_compat_syscall()) {
+       if (in_32bit_syscall()) {
                return is_legacy ? mm->mmap_compat_legacy_base
                                 : mm->mmap_compat_base;
        }
index a80fdd7fb40f3b25791b2ab816ef068eb2f09170..abffa0be80da17b7996a578f8d22af14dfebe326 100644 (file)
@@ -399,9 +399,17 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
                n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
                ret = -1;
                for_each_node_mask(i, physnode_mask) {
+                       /*
+                        * The reason we pass in blk[0] is due to
+                        * numa_remove_memblk_from() called by
+                        * emu_setup_memblk() will delete entry 0
+                        * and then move everything else up in the pi.blk
+                        * array. Therefore we should always be looking
+                        * at blk[0].
+                        */
                        ret = split_nodes_size_interleave_uniform(&ei, &pi,
-                                       pi.blk[i].start, pi.blk[i].end, 0,
-                                       n, &pi.blk[i], nid);
+                                       pi.blk[0].start, pi.blk[0].end, 0,
+                                       n, &pi.blk[0], nid);
                        if (ret < 0)
                                break;
                        if (ret < n) {
index f799076e3d577065e8c73c12c6cddb0c5d6f6cfe..db7a1008223886d398c531d8d34720cd0265d17d 100644 (file)
@@ -2309,9 +2309,13 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 
        /*
         * We should perform an IPI and flush all tlbs,
-        * but that can deadlock->flush only current cpu:
+        * but that can deadlock->flush only current cpu.
+        * Preemption needs to be disabled around __flush_tlb_all() due to
+        * CR3 reload in __native_flush_tlb().
         */
+       preempt_disable();
        __flush_tlb_all();
+       preempt_enable();
 
        arch_flush_lazy_mmu_mode();
 }
index 0b08067c45f3daa6a62b71c1cc9b4663f2c003a1..b629f6992d9f65dd6ec587ece3e808843ffd748f 100644 (file)
@@ -130,7 +130,7 @@ static void regex_init(int use_real_mode)
                              REG_EXTENDED|REG_NOSUB);
 
                if (err) {
-                       regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf);
+                       regerror(err, &sym_regex_c[i], errbuf, sizeof(errbuf));
                        die("%s", errbuf);
                }
         }
@@ -405,7 +405,7 @@ static void read_shdrs(FILE *fp)
        }
        for (i = 0; i < ehdr.e_shnum; i++) {
                struct section *sec = &secs[i];
-               if (fread(&shdr, sizeof shdr, 1, fp) != 1)
+               if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
                        die("Cannot read ELF section headers %d/%d: %s\n",
                            i, ehdr.e_shnum, strerror(errno));
                sec->shdr.sh_name      = elf_word_to_cpu(shdr.sh_name);
index 413f3519d9a12ea4f66c14720c92e918c306c345..c907b20d49935d5bb15b4f5bc602918d9b815341 100644 (file)
@@ -194,7 +194,7 @@ extern unsigned long um_vdso_addr;
 
 typedef unsigned long elf_greg_t;
 
-#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
+#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 typedef struct user_i387_struct elf_fpregset_t;
index b06731705529b1e4c339bc21db8de0a565acf6d8..055e37e43541ed17d11cf4a194085b7fc3a3192c 100644 (file)
@@ -656,8 +656,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 
        /*
         * The interface requires atomic updates on p2m elements.
-        * xen_safe_write_ulong() is using __put_user which does an atomic
-        * store via asm().
+        * xen_safe_write_ulong() is using an atomic store via asm().
         */
        if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
                return true;
index 441c8826216982a4fb9532b68d68a37cbb0d3e05..1c8a8816a402abd09bf199f57314daf61d9819d9 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/log2.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/atomic.h>
 
 #include <asm/paravirt.h>
 #include <asm/qspinlock.h>
@@ -21,6 +22,7 @@
 
 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
 static DEFINE_PER_CPU(char *, irq_name);
+static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest);
 static bool xen_pvspin = true;
 
 static void xen_qlock_kick(int cpu)
@@ -39,25 +41,25 @@ static void xen_qlock_kick(int cpu)
  */
 static void xen_qlock_wait(u8 *byte, u8 val)
 {
-       unsigned long flags;
        int irq = __this_cpu_read(lock_kicker_irq);
+       atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest);
 
        /* If kicker interrupts not initialized yet, just spin */
        if (irq == -1 || in_nmi())
                return;
 
-       /* Guard against reentry. */
-       local_irq_save(flags);
+       /* Detect reentry. */
+       atomic_inc(nest_cnt);
 
-       /* If irq pending already clear it. */
-       if (xen_test_irq_pending(irq)) {
+       /* If irq pending already and no nested call clear it. */
+       if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) {
                xen_clear_irq_pending(irq);
        } else if (READ_ONCE(*byte) == val) {
                /* Block until irq becomes pending (or a spurious wakeup) */
                xen_poll_irq(irq);
        }
 
-       local_irq_restore(flags);
+       atomic_dec(nest_cnt);
 }
 
 static irqreturn_t dummy_handler(int irq, void *dev_id)
index 60c141af222bc5e05c426e6c0edf976e93c036d0..d29b7365da8d9facd71a887a071b34bbb68ec5fc 100644 (file)
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-config ZONE_DMA
-       def_bool y
-
 config XTENSA
        def_bool y
        select ARCH_HAS_SG_CHAIN
index dc9e0ba7122cad1e982ac33eb5c9d60d9a1db48a..294846117fc2c5527e297ccd50eb55c31c3f3228 100644 (file)
@@ -33,7 +33,7 @@ uImage: $(obj)/uImage
 boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y))
        $(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS)
 
-OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary
+OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary
 
 vmlinux.bin: vmlinux FORCE
        $(call if_changed,objcopy)
index b727b18a68acd9ff639d9da228fc70d94dc657ff..b80a430453b1cb7d76b8598db8eb0b06f039df8d 100644 (file)
@@ -131,6 +131,7 @@ SECTIONS
   .fixup   : { *(.fixup) }
 
   EXCEPTION_TABLE(16)
+  NOTES
   /* Data section */
 
   _sdata = .;
@@ -296,38 +297,11 @@ SECTIONS
 
   _end = .;
 
-  .xt.lit : { *(.xt.lit) }
-  .xt.prop : { *(.xt.prop) }
-
-  .debug  0 :  { *(.debug) }
-  .line  0 :  { *(.line) }
-  .debug_srcinfo  0 :  { *(.debug_srcinfo) }
-  .debug_sfnames  0 :  { *(.debug_sfnames) }
-  .debug_aranges  0 :  { *(.debug_aranges) }
-  .debug_pubnames  0 :  { *(.debug_pubnames) }
-  .debug_info  0 :  { *(.debug_info) }
-  .debug_abbrev  0 :  { *(.debug_abbrev) }
-  .debug_line  0 :  { *(.debug_line) }
-  .debug_frame  0 :  { *(.debug_frame) }
-  .debug_str  0 :  { *(.debug_str) }
-  .debug_loc  0 :  { *(.debug_loc) }
-  .debug_macinfo  0 :  { *(.debug_macinfo) }
-  .debug_weaknames  0 :  { *(.debug_weaknames) }
-  .debug_funcnames  0 :  { *(.debug_funcnames) }
-  .debug_typenames  0 :  { *(.debug_typenames) }
-  .debug_varnames  0 :  { *(.debug_varnames) }
-
-  .xt.insn 0 :
-  {
-    *(.xt.insn)
-    *(.gnu.linkonce.x*)
-  }
+  DWARF_DEBUG
 
-  .xt.lit 0 :
-  {
-    *(.xt.lit)
-    *(.gnu.linkonce.p*)
-  }
+  .xt.prop 0 : { KEEP(*(.xt.prop .xt.prop.* .gnu.linkonce.prop.*)) }
+  .xt.insn 0 : { KEEP(*(.xt.insn .xt.insn.* .gnu.linkonce.x*)) }
+  .xt.lit  0 : { KEEP(*(.xt.lit  .xt.lit.*  .gnu.linkonce.p*)) }
 
   /* Sections to be discarded */
   DISCARDS
index 9750a48f491b19c087b8b1013b4614eaf61e4cac..30a48bba4a47372b81ce5f0e6e40aec8ca564dcd 100644 (file)
@@ -71,7 +71,7 @@ void __init zones_init(void)
 {
        /* All pages are DMA-able, so we put them all in the DMA zone. */
        unsigned long zones_size[MAX_NR_ZONES] = {
-               [ZONE_DMA] = max_low_pfn - ARCH_PFN_OFFSET,
+               [ZONE_NORMAL] = max_low_pfn - ARCH_PFN_OFFSET,
 #ifdef CONFIG_HIGHMEM
                [ZONE_HIGHMEM] = max_pfn - max_low_pfn,
 #endif
index d9a7916ff0ab6474a6f4abac2873a6685ad4d467..9fe5952d117d553f12f32055fde8683c554b06a8 100644 (file)
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
        uint64_t serial_nr;
 
        rcu_read_lock();
-       serial_nr = __bio_blkcg(bio)->css.serial_nr;
+       serial_nr = bio_blkcg(bio)->css.serial_nr;
 
        /*
         * Check whether blkcg has changed.  The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
        if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
                goto out;
 
-       bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+       bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
        /*
         * Update blkg_path for bfq_log_* functions. We cache this
         * path, and update it here, for the following
index 6075100f03a50a73da838b19891b923d0ad422a7..3a27d31fcda60250854ced98c68fe3db32ac587c 100644 (file)
@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
 
        rcu_read_lock();
 
-       bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
+       bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
        if (!bfqg) {
                bfqq = &bfqd->oom_bfqq;
                goto out;
index bbfeb4ee2892fcbd9d51de450c41fab7dc466ce5..a50d59236b1976439e691bc0f22da002eedfd0a9 100644 (file)
@@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
        bio->bi_iter = bio_src->bi_iter;
        bio->bi_io_vec = bio_src->bi_io_vec;
 
-       bio_clone_blkg_association(bio, bio_src);
-
-       blkcg_bio_issue_init(bio);
+       bio_clone_blkcg_association(bio, bio_src);
 }
 EXPORT_SYMBOL(__bio_clone_fast);
 
@@ -1256,12 +1254,13 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
        /*
         * success
         */
-       if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
+       if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) ||
            (map_data && map_data->from_user)) {
                ret = bio_copy_from_iter(bio, iter);
                if (ret)
                        goto cleanup;
        } else {
+               zero_fill_bio(bio);
                iov_iter_advance(iter, bio->bi_iter.bi_size);
        }
 
@@ -1956,151 +1955,69 @@ EXPORT_SYMBOL(bioset_init_from_src);
 
 #ifdef CONFIG_BLK_CGROUP
 
-/**
- * bio_associate_blkg - associate a bio with the a blkg
- * @bio: target bio
- * @blkg: the blkg to associate
- *
- * This tries to associate @bio with the specified blkg.  Association failure
- * is handled by walking up the blkg tree.  Therefore, the blkg associated can
- * be anything between @blkg and the root_blkg.  This situation only happens
- * when a cgroup is dying and then the remaining bios will spill to the closest
- * alive blkg.
- *
- * A reference will be taken on the @blkg and will be released when @bio is
- * freed.
- */
-int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
-{
-       if (unlikely(bio->bi_blkg))
-               return -EBUSY;
-       bio->bi_blkg = blkg_tryget_closest(blkg);
-       return 0;
-}
-
-/**
- * __bio_associate_blkg_from_css - internal blkg association function
- *
- * This in the core association function that all association paths rely on.
- * A blkg reference is taken which is released upon freeing of the bio.
- */
-static int __bio_associate_blkg_from_css(struct bio *bio,
-                                        struct cgroup_subsys_state *css)
-{
-       struct request_queue *q = bio->bi_disk->queue;
-       struct blkcg_gq *blkg;
-       int ret;
-
-       rcu_read_lock();
-
-       if (!css || !css->parent)
-               blkg = q->root_blkg;
-       else
-               blkg = blkg_lookup_create(css_to_blkcg(css), q);
-
-       ret = bio_associate_blkg(bio, blkg);
-
-       rcu_read_unlock();
-       return ret;
-}
-
-/**
- * bio_associate_blkg_from_css - associate a bio with a specified css
- * @bio: target bio
- * @css: target css
- *
- * Associate @bio with the blkg found by combining the css's blkg and the
- * request_queue of the @bio.  This falls back to the queue's root_blkg if
- * the association fails with the css.
- */
-int bio_associate_blkg_from_css(struct bio *bio,
-                               struct cgroup_subsys_state *css)
-{
-       if (unlikely(bio->bi_blkg))
-               return -EBUSY;
-       return __bio_associate_blkg_from_css(bio, css);
-}
-EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
-
 #ifdef CONFIG_MEMCG
 /**
- * bio_associate_blkg_from_page - associate a bio with the page's blkg
+ * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
  * @bio: target bio
  * @page: the page to lookup the blkcg from
  *
- * Associate @bio with the blkg from @page's owning memcg and the respective
- * request_queue.  If cgroup_e_css returns NULL, fall back to the queue's
- * root_blkg.
- *
- * Note: this must be called after bio has an associated device.
+ * Associate @bio with the blkcg from @page's owning memcg.  This works like
+ * every other associate function wrt references.
  */
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
 {
-       struct cgroup_subsys_state *css;
-       int ret;
+       struct cgroup_subsys_state *blkcg_css;
 
-       if (unlikely(bio->bi_blkg))
+       if (unlikely(bio->bi_css))
                return -EBUSY;
        if (!page->mem_cgroup)
                return 0;
-
-       rcu_read_lock();
-
-       css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
-
-       ret = __bio_associate_blkg_from_css(bio, css);
-
-       rcu_read_unlock();
-       return ret;
+       blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
+                                    &io_cgrp_subsys);
+       bio->bi_css = blkcg_css;
+       return 0;
 }
 #endif /* CONFIG_MEMCG */
 
 /**
- * bio_associate_create_blkg - associate a bio with a blkg from q
- * @q: request_queue where bio is going
+ * bio_associate_blkcg - associate a bio with the specified blkcg
  * @bio: target bio
+ * @blkcg_css: css of the blkcg to associate
+ *
+ * Associate @bio with the blkcg specified by @blkcg_css.  Block layer will
+ * treat @bio as if it were issued by a task which belongs to the blkcg.
  *
- * Associate @bio with the blkg found from the bio's css and the request_queue.
- * If one is not found, bio_lookup_blkg creates the blkg.  This falls back to
- * the queue's root_blkg if association fails.
+ * This function takes an extra reference of @blkcg_css which will be put
+ * when @bio is released.  The caller must own @bio and is responsible for
+ * synchronizing calls to this function.
  */
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
 {
-       struct cgroup_subsys_state *css;
-       int ret = 0;
-
-       /* someone has already associated this bio with a blkg */
-       if (bio->bi_blkg)
-               return ret;
-
-       rcu_read_lock();
-
-       css = blkcg_css();
-
-       ret = __bio_associate_blkg_from_css(bio, css);
-
-       rcu_read_unlock();
-       return ret;
+       if (unlikely(bio->bi_css))
+               return -EBUSY;
+       css_get(blkcg_css);
+       bio->bi_css = blkcg_css;
+       return 0;
 }
+EXPORT_SYMBOL_GPL(bio_associate_blkcg);
 
 /**
- * bio_reassociate_blkg - reassociate a bio with a blkg from q
- * @q: request_queue where bio is going
+ * bio_associate_blkg - associate a bio with the specified blkg
  * @bio: target bio
+ * @blkg: the blkg to associate
  *
- * When submitting a bio, multiple recursive calls to make_request() may occur.
- * This causes the initial associate done in blkcg_bio_issue_check() to be
- * incorrect and reference the prior request_queue.  This performs reassociation
- * when this situation happens.
+ * Associate @bio with the blkg specified by @blkg.  This is the queue specific
+ * blkcg information associated with the @bio, a reference will be taken on the
+ * @blkg and will be freed when the bio is freed.
  */
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
+int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 {
-       if (bio->bi_blkg) {
-               blkg_put(bio->bi_blkg);
-               bio->bi_blkg = NULL;
-       }
-
-       return bio_associate_create_blkg(q, bio);
+       if (unlikely(bio->bi_blkg))
+               return -EBUSY;
+       if (!blkg_try_get(blkg))
+               return -ENODEV;
+       bio->bi_blkg = blkg;
+       return 0;
 }
 
 /**
@@ -2113,6 +2030,10 @@ void bio_disassociate_task(struct bio *bio)
                put_io_context(bio->bi_ioc);
                bio->bi_ioc = NULL;
        }
+       if (bio->bi_css) {
+               css_put(bio->bi_css);
+               bio->bi_css = NULL;
+       }
        if (bio->bi_blkg) {
                blkg_put(bio->bi_blkg);
                bio->bi_blkg = NULL;
@@ -2120,16 +2041,16 @@ void bio_disassociate_task(struct bio *bio)
 }
 
 /**
- * bio_clone_blkg_association - clone blkg association from src to dst bio
+ * bio_clone_blkcg_association - clone blkcg association from src to dst bio
  * @dst: destination bio
  * @src: source bio
  */
-void bio_clone_blkg_association(struct bio *dst, struct bio *src)
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
 {
-       if (src->bi_blkg)
-               bio_associate_blkg(dst, src->bi_blkg);
+       if (src->bi_css)
+               WARN_ON(bio_associate_blkcg(dst, src->bi_css));
 }
-EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
+EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
 #endif /* CONFIG_BLK_CGROUP */
 
 static void __init biovec_init_slabs(void)
index 992da5592c6ed14208116b794975c75c2b3986a1..c630e02836a80d7d406778208c659aebda8fcf06 100644 (file)
@@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg)
        kfree(blkg);
 }
 
-static void __blkg_release(struct rcu_head *rcu)
-{
-       struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
-
-       percpu_ref_exit(&blkg->refcnt);
-
-       /* release the blkcg and parent blkg refs this blkg has been holding */
-       css_put(&blkg->blkcg->css);
-       if (blkg->parent)
-               blkg_put(blkg->parent);
-
-       wb_congested_put(blkg->wb_congested);
-
-       blkg_free(blkg);
-}
-
-/*
- * A group is RCU protected, but having an rcu lock does not mean that one
- * can access all the fields of blkg and assume these are valid.  For
- * example, don't try to follow throtl_data and request queue links.
- *
- * Having a reference to blkg under an rcu allows accesses to only values
- * local to groups like group stats and group rate limits.
- */
-static void blkg_release(struct percpu_ref *ref)
-{
-       struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
-
-       call_rcu(&blkg->rcu_head, __blkg_release);
-}
-
 /**
  * blkg_alloc - allocate a blkg
  * @blkcg: block cgroup the new blkg is associated with
@@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
        blkg->q = q;
        INIT_LIST_HEAD(&blkg->q_node);
        blkg->blkcg = blkcg;
+       atomic_set(&blkg->refcnt, 1);
 
        /* root blkg uses @q->root_rl, init rl only for !root blkgs */
        if (blkcg != &blkcg_root) {
@@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
                blkg_get(blkg->parent);
        }
 
-       ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
-                             GFP_NOWAIT | __GFP_NOWARN);
-       if (ret)
-               goto err_cancel_ref;
-
        /* invoke per-policy init */
        for (i = 0; i < BLKCG_MAX_POLS; i++) {
                struct blkcg_policy *pol = blkcg_policy[i];
@@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
        blkg_put(blkg);
        return ERR_PTR(ret);
 
-err_cancel_ref:
-       percpu_ref_exit(&blkg->refcnt);
 err_put_congested:
        wb_congested_put(wb_congested);
 err_put_css:
@@ -296,7 +259,7 @@ err_free_blkg:
 }
 
 /**
- * __blkg_lookup_create - lookup blkg, try to create one if not there
+ * blkg_lookup_create - lookup blkg, try to create one if not there
  * @blkcg: blkcg of interest
  * @q: request_queue of interest
  *
@@ -305,11 +268,12 @@ err_free_blkg:
  * that all non-root blkg's have access to the parent blkg.  This function
  * should be called under RCU read lock and @q->queue_lock.
  *
- * Returns the blkg or the closest blkg if blkg_create fails as it walks
- * down from root.
+ * Returns pointer to the looked up or created blkg on success, ERR_PTR()
+ * value on error.  If @q is dead, returns ERR_PTR(-EINVAL).  If @q is not
+ * dead and bypassing, returns ERR_PTR(-EBUSY).
  */
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-                                     struct request_queue *q)
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+                                   struct request_queue *q)
 {
        struct blkcg_gq *blkg;
 
@@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
         * we shouldn't allow anything to go through for a bypassing queue.
         */
        if (unlikely(blk_queue_bypass(q)))
-               return q->root_blkg;
+               return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
 
        blkg = __blkg_lookup(blkcg, q, true);
        if (blkg)
@@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 
        /*
         * Create blkgs walking down from blkcg_root to @blkcg, so that all
-        * non-root blkgs have access to their parents.  Returns the closest
-        * blkg to the intended blkg should blkg_create() fail.
+        * non-root blkgs have access to their parents.
         */
        while (true) {
                struct blkcg *pos = blkcg;
                struct blkcg *parent = blkcg_parent(blkcg);
-               struct blkcg_gq *ret_blkg = q->root_blkg;
-
-               while (parent) {
-                       blkg = __blkg_lookup(parent, q, false);
-                       if (blkg) {
-                               /* remember closest blkg */
-                               ret_blkg = blkg;
-                               break;
-                       }
+
+               while (parent && !__blkg_lookup(parent, q, false)) {
                        pos = parent;
                        parent = blkcg_parent(parent);
                }
 
                blkg = blkg_create(pos, q, NULL);
-               if (IS_ERR(blkg))
-                       return ret_blkg;
-               if (pos == blkcg)
+               if (pos == blkcg || IS_ERR(blkg))
                        return blkg;
        }
 }
 
-/**
- * blkg_lookup_create - find or create a blkg
- * @blkcg: target block cgroup
- * @q: target request_queue
- *
- * This looks up or creates the blkg representing the unique pair
- * of the blkcg and the request_queue.
- */
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
-                                   struct request_queue *q)
-{
-       struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
-       unsigned long flags;
-
-       if (unlikely(!blkg)) {
-               spin_lock_irqsave(q->queue_lock, flags);
-
-               blkg = __blkg_lookup_create(blkcg, q);
-
-               spin_unlock_irqrestore(q->queue_lock, flags);
-       }
-
-       return blkg;
-}
-
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
        struct blkcg *blkcg = blkg->blkcg;
@@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
         * Put the reference taken at the time of creation so that when all
         * queues are gone, group can be destroyed.
         */
-       percpu_ref_kill(&blkg->refcnt);
+       blkg_put(blkg);
 }
 
 /**
@@ -451,6 +380,29 @@ static void blkg_destroy_all(struct request_queue *q)
        q->root_rl.blkg = NULL;
 }
 
+/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid.  For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+void __blkg_release_rcu(struct rcu_head *rcu_head)
+{
+       struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
+
+       /* release the blkcg and parent blkg refs this blkg has been holding */
+       css_put(&blkg->blkcg->css);
+       if (blkg->parent)
+               blkg_put(blkg->parent);
+
+       wb_congested_put(blkg->wb_congested);
+
+       blkg_free(blkg);
+}
+EXPORT_SYMBOL_GPL(__blkg_release_rcu);
+
 /*
  * The next function used by blk_queue_for_each_rl().  It's a bit tricky
  * because the root blkg uses @q->root_rl instead of its own rl.
@@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void)
        blkg = blkg_lookup(blkcg, q);
        if (!blkg)
                goto out;
-       if (!blkg_tryget(blkg))
+       blkg = blkg_try_get(blkg);
+       if (!blkg)
                goto out;
        rcu_read_unlock();
 
index bc6ea87d10e02cffcaedec7cc9d4567d88cdd6b6..ce12515f9b9b9930da4515ed8f70a4cf5f5b946f 100644 (file)
@@ -785,6 +785,9 @@ void blk_cleanup_queue(struct request_queue *q)
         * prevent that q->request_fn() gets invoked after draining finished.
         */
        blk_freeze_queue(q);
+
+       rq_qos_exit(q);
+
        spin_lock_irq(lock);
        queue_flag_set(QUEUE_FLAG_DEAD, q);
        spin_unlock_irq(lock);
@@ -2432,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio)
                        if (q)
                                blk_queue_exit(q);
                        q = bio->bi_disk->queue;
-                       bio_reassociate_blkg(q, bio);
                        flags = 0;
                        if (bio->bi_opf & REQ_NOWAIT)
                                flags = BLK_MQ_REQ_NOWAIT;
index 28f80d22752858a2b1fcdfefb5f079469ec480ee..38c35c32aff2dcf3fc0e9ac294a649f0be4a1cb1 100644 (file)
@@ -482,12 +482,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
                                     spinlock_t *lock)
 {
        struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
-       struct blkcg_gq *blkg = bio->bi_blkg;
+       struct blkcg *blkcg;
+       struct blkcg_gq *blkg;
+       struct request_queue *q = rqos->q;
        bool issue_as_root = bio_issue_as_root_blkg(bio);
 
        if (!blk_iolatency_enabled(blkiolat))
                return;
 
+       rcu_read_lock();
+       blkcg = bio_blkcg(bio);
+       bio_associate_blkcg(bio, &blkcg->css);
+       blkg = blkg_lookup(blkcg, q);
+       if (unlikely(!blkg)) {
+               if (!lock)
+                       spin_lock_irq(q->queue_lock);
+               blkg = blkg_lookup_create(blkcg, q);
+               if (IS_ERR(blkg))
+                       blkg = NULL;
+               if (!lock)
+                       spin_unlock_irq(q->queue_lock);
+       }
+       if (!blkg)
+               goto out;
+
+       bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+       bio_associate_blkg(bio, blkg);
+out:
+       rcu_read_unlock();
        while (blkg && blkg->parent) {
                struct iolatency_grp *iolat = blkg_to_lat(blkg);
                if (!iolat) {
@@ -708,7 +730,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
                 * We could be exiting, don't access the pd unless we have a
                 * ref on the blkg.
                 */
-               if (!blkg_tryget(blkg))
+               if (!blkg_try_get(blkg))
                        continue;
 
                iolat = blkg_to_lat(blkg);
index 76f867ea9a9b92fdfa921843a6a0ffe2c4297087..e8b3bb9bf37595acea4ec434b3664f8ad6a8c2b8 100644 (file)
@@ -51,16 +51,12 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        if ((sector | nr_sects) & bs_mask)
                return -EINVAL;
 
-       while (nr_sects) {
-               unsigned int req_sects = nr_sects;
-               sector_t end_sect;
-
-               if (!req_sects)
-                       goto fail;
-               if (req_sects > UINT_MAX >> 9)
-                       req_sects = UINT_MAX >> 9;
+       if (!nr_sects)
+               return -EINVAL;
 
-               end_sect = sector + req_sects;
+       while (nr_sects) {
+               unsigned int req_sects = min_t(unsigned int, nr_sects,
+                               bio_allowed_max_sectors(q));
 
                bio = blk_next_bio(bio, 0, gfp_mask);
                bio->bi_iter.bi_sector = sector;
@@ -68,8 +64,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                bio_set_op_attrs(bio, op, 0);
 
                bio->bi_iter.bi_size = req_sects << 9;
+               sector += req_sects;
                nr_sects -= req_sects;
-               sector = end_sect;
 
                /*
                 * We can loop for a long time in here, if someone does
@@ -82,14 +78,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
        *biop = bio;
        return 0;
-
-fail:
-       if (bio) {
-               submit_bio_wait(bio);
-               bio_put(bio);
-       }
-       *biop = NULL;
-       return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(__blkdev_issue_discard);
 
@@ -161,7 +149,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
                return -EOPNOTSUPP;
 
        /* Ensure that max_write_same_sectors doesn't overflow bi_size */
-       max_write_same_sectors = UINT_MAX >> 9;
+       max_write_same_sectors = bio_allowed_max_sectors(q);
 
        while (nr_sects) {
                bio = blk_next_bio(bio, 1, gfp_mask);
index 42a46744c11b45e4970bbe8a918fcf8b29d895d8..e7696c47489ad1f8caa11a64ff8fdb2b6d0f41a2 100644 (file)
@@ -46,7 +46,7 @@ static inline bool bio_will_gap(struct request_queue *q,
                bio_get_first_bvec(prev_rq->bio, &pb);
        else
                bio_get_first_bvec(prev, &pb);
-       if (pb.bv_offset)
+       if (pb.bv_offset & queue_virt_boundary(q))
                return true;
 
        /*
@@ -90,7 +90,8 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
        /* Zero-sector (unknown) and one-sector granularities are the same.  */
        granularity = max(q->limits.discard_granularity >> 9, 1U);
 
-       max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+       max_discard_sectors = min(q->limits.max_discard_sectors,
+                       bio_allowed_max_sectors(q));
        max_discard_sectors -= max_discard_sectors % granularity;
 
        if (unlikely(!max_discard_sectors)) {
@@ -714,6 +715,31 @@ static void blk_account_io_merge(struct request *req)
                part_stat_unlock();
        }
 }
+/*
+ * Two cases of handling DISCARD merge:
+ * If max_discard_segments > 1, the driver takes every bio
+ * as a range and send them to controller together. The ranges
+ * needn't to be contiguous.
+ * Otherwise, the bios/requests will be handled as same as
+ * others which should be contiguous.
+ */
+static inline bool blk_discard_mergable(struct request *req)
+{
+       if (req_op(req) == REQ_OP_DISCARD &&
+           queue_max_discard_segments(req->q) > 1)
+               return true;
+       return false;
+}
+
+enum elv_merge blk_try_req_merge(struct request *req, struct request *next)
+{
+       if (blk_discard_mergable(req))
+               return ELEVATOR_DISCARD_MERGE;
+       else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
+               return ELEVATOR_BACK_MERGE;
+
+       return ELEVATOR_NO_MERGE;
+}
 
 /*
  * For non-mq, this has to be called with the request spinlock acquired.
@@ -731,12 +757,6 @@ static struct request *attempt_merge(struct request_queue *q,
        if (req_op(req) != req_op(next))
                return NULL;
 
-       /*
-        * not contiguous
-        */
-       if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
-               return NULL;
-
        if (rq_data_dir(req) != rq_data_dir(next)
            || req->rq_disk != next->rq_disk
            || req_no_special_merge(next))
@@ -760,11 +780,19 @@ static struct request *attempt_merge(struct request_queue *q,
         * counts here. Handle DISCARDs separately, as they
         * have separate settings.
         */
-       if (req_op(req) == REQ_OP_DISCARD) {
+
+       switch (blk_try_req_merge(req, next)) {
+       case ELEVATOR_DISCARD_MERGE:
                if (!req_attempt_discard_merge(q, req, next))
                        return NULL;
-       } else if (!ll_merge_requests_fn(q, req, next))
+               break;
+       case ELEVATOR_BACK_MERGE:
+               if (!ll_merge_requests_fn(q, req, next))
+                       return NULL;
+               break;
+       default:
                return NULL;
+       }
 
        /*
         * If failfast settings disagree or any of the two is already
@@ -888,8 +916,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 
 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
 {
-       if (req_op(rq) == REQ_OP_DISCARD &&
-           queue_max_discard_segments(rq->q) > 1)
+       if (blk_discard_mergable(rq))
                return ELEVATOR_DISCARD_MERGE;
        else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
                return ELEVATOR_BACK_MERGE;
index 0641533597f1b2cc389e6579a986d6df17115646..844a454a7b3a60a0c3186b589ab12daad0e9b3b8 100644 (file)
@@ -1007,8 +1007,6 @@ void blk_unregister_queue(struct gendisk *disk)
        kobject_del(&q->kobj);
        blk_trace_remove_sysfs(disk_to_dev(disk));
 
-       rq_qos_exit(q);
-
        mutex_lock(&q->sysfs_lock);
        if (q->request_fn || (q->mq_ops && q->elevator))
                elv_unregister_queue(q);
index 4bda70e8db48a9150880dc04a8a1f3fcb8844ac6..db1a3a2ae00617fbe1e4804bbfd327e37ce55737 100644 (file)
@@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 }
 #endif
 
+static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
+{
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+       /* fallback to root_blkg if we fail to get a blkg ref */
+       if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
+               bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
+       bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+#endif
+}
+
 bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
                    struct bio *bio)
 {
        struct throtl_qnode *qn = NULL;
-       struct throtl_grp *tg = blkg_to_tg(blkg);
+       struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
        struct throtl_service_queue *sq;
        bool rw = bio_data_dir(bio);
        bool throttled = false;
@@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
        if (unlikely(blk_queue_bypass(q)))
                goto out_unlock;
 
+       blk_throtl_assoc_bio(tg, bio);
        blk_throtl_update_idletime(tg);
 
        sq = &tg->service_queue;
index a1841b8ff12963a883047780762229f923989f01..0089fefdf771d7082ee05ca97504005090a26025 100644 (file)
@@ -169,7 +169,7 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
 static inline bool __bvec_gap_to_prev(struct request_queue *q,
                struct bio_vec *bprv, unsigned int offset)
 {
-       return offset ||
+       return (offset & queue_virt_boundary(q)) ||
                ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
 }
 
@@ -395,6 +395,16 @@ static inline unsigned long blk_rq_deadline(struct request *rq)
        return rq->__deadline & ~0x1UL;
 }
 
+/*
+ * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
+ * is defined as 'unsigned int', meantime it has to aligned to with logical
+ * block size which is the minimum accepted unit by hardware.
+ */
+static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
+{
+       return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
+}
+
 /*
  * Internal io_context interface
  */
index cf49fe02f65cd017eb2132fd3475ba29a5d3cf75..36869afc258ccf6ea609e0e74db6cea56e6d2c34 100644 (file)
@@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
                }
        }
 
-       bio_clone_blkg_association(bio, bio_src);
-
-       blkcg_bio_issue_init(bio);
+       bio_clone_blkcg_association(bio, bio_src);
 
        return bio;
 }
index 6a3d87dd3c1ac42abf04223ea14eb673e7c5ec5c..ed41aa978c4abc66cd2845f4c18177c97387a881 100644 (file)
@@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
        uint64_t serial_nr;
 
        rcu_read_lock();
-       serial_nr = __bio_blkcg(bio)->css.serial_nr;
+       serial_nr = bio_blkcg(bio)->css.serial_nr;
        rcu_read_unlock();
 
        /*
@@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
        struct cfq_group *cfqg;
 
        rcu_read_lock();
-       cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
+       cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
        if (!cfqg) {
                cfqq = &cfqd->oom_cfqq;
                goto out;
index f3702e533ff41044694625aad813abc58b8af4dd..be70ca6c85d31e89329b032ce7445aa1c2b85f32 100644 (file)
@@ -21,6 +21,18 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
          appropriate hash algorithms (such as SHA-1) must be available.
          ENOPKG will be reported if the requisite algorithm is unavailable.
 
+config ASYMMETRIC_TPM_KEY_SUBTYPE
+       tristate "Asymmetric TPM backed private key subtype"
+       depends on TCG_TPM
+       depends on TRUSTED_KEYS
+       select CRYPTO_HMAC
+       select CRYPTO_SHA1
+       select CRYPTO_HASH_INFO
+       help
+         This option provides support for TPM backed private key type handling.
+         Operations such as sign, verify, encrypt, decrypt are performed by
+         the TPM after the private key is loaded.
+
 config X509_CERTIFICATE_PARSER
        tristate "X.509 certificate parser"
        depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
@@ -31,6 +43,25 @@ config X509_CERTIFICATE_PARSER
          data and provides the ability to instantiate a crypto key from a
          public key packet found inside the certificate.
 
+config PKCS8_PRIVATE_KEY_PARSER
+       tristate "PKCS#8 private key parser"
+       depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
+       select ASN1
+       select OID_REGISTRY
+       help
+         This option provides support for parsing PKCS#8 format blobs for
+         private key data and provides the ability to instantiate a crypto key
+         from that data.
+
+config TPM_KEY_PARSER
+       tristate "TPM private key parser"
+       depends on ASYMMETRIC_TPM_KEY_SUBTYPE
+       select ASN1
+       help
+         This option provides support for parsing TPM format blobs for
+         private key data and provides the ability to instantiate a crypto key
+         from that data.
+
 config PKCS7_MESSAGE_PARSER
        tristate "PKCS#7 message parser"
        depends on X509_CERTIFICATE_PARSER
index d4b2e1b2dc650837ae98489799a3431ee1ce70d4..28b91adba2aed35f830e6c6f7d356004f1d5248c 100644 (file)
@@ -11,6 +11,7 @@ asymmetric_keys-y := \
        signature.o
 
 obj-$(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) += public_key.o
+obj-$(CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE) += asym_tpm.o
 
 #
 # X.509 Certificate handling
@@ -29,6 +30,19 @@ $(obj)/x509_cert_parser.o: \
 $(obj)/x509.asn1.o: $(obj)/x509.asn1.c $(obj)/x509.asn1.h
 $(obj)/x509_akid.asn1.o: $(obj)/x509_akid.asn1.c $(obj)/x509_akid.asn1.h
 
+#
+# PKCS#8 private key handling
+#
+obj-$(CONFIG_PKCS8_PRIVATE_KEY_PARSER) += pkcs8_key_parser.o
+pkcs8_key_parser-y := \
+       pkcs8.asn1.o \
+       pkcs8_parser.o
+
+$(obj)/pkcs8_parser.o: $(obj)/pkcs8.asn1.h
+$(obj)/pkcs8-asn1.o: $(obj)/pkcs8.asn1.c $(obj)/pkcs8.asn1.h
+
+clean-files    += pkcs8.asn1.c pkcs8.asn1.h
+
 #
 # PKCS#7 message handling
 #
@@ -61,3 +75,14 @@ verify_signed_pefile-y := \
 
 $(obj)/mscode_parser.o: $(obj)/mscode.asn1.h $(obj)/mscode.asn1.h
 $(obj)/mscode.asn1.o: $(obj)/mscode.asn1.c $(obj)/mscode.asn1.h
+
+#
+# TPM private key parsing
+#
+obj-$(CONFIG_TPM_KEY_PARSER) += tpm_key_parser.o
+tpm_key_parser-y := \
+       tpm.asn1.o \
+       tpm_parser.o
+
+$(obj)/tpm_parser.o: $(obj)/tpm.asn1.h
+$(obj)/tpm.asn1.o: $(obj)/tpm.asn1.c $(obj)/tpm.asn1.h
diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
new file mode 100644 (file)
index 0000000..5d4c270
--- /dev/null
@@ -0,0 +1,988 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) "ASYM-TPM: "fmt
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/scatterlist.h>
+#include <linux/tpm.h>
+#include <linux/tpm_command.h>
+#include <crypto/akcipher.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <asm/unaligned.h>
+#include <keys/asymmetric-subtype.h>
+#include <keys/trusted.h>
+#include <crypto/asym_tpm_subtype.h>
+#include <crypto/public_key.h>
+
+#define TPM_ORD_FLUSHSPECIFIC  186
+#define TPM_ORD_LOADKEY2       65
+#define TPM_ORD_UNBIND         30
+#define TPM_ORD_SIGN           60
+#define TPM_LOADKEY2_SIZE              59
+#define TPM_FLUSHSPECIFIC_SIZE         18
+#define TPM_UNBIND_SIZE                        63
+#define TPM_SIGN_SIZE                  63
+
+#define TPM_RT_KEY                      0x00000001
+
+/*
+ * Load a TPM key from the blob provided by userspace
+ */
+static int tpm_loadkey2(struct tpm_buf *tb,
+                       uint32_t keyhandle, unsigned char *keyauth,
+                       const unsigned char *keyblob, int keybloblen,
+                       uint32_t *newhandle)
+{
+       unsigned char nonceodd[TPM_NONCE_SIZE];
+       unsigned char enonce[TPM_NONCE_SIZE];
+       unsigned char authdata[SHA1_DIGEST_SIZE];
+       uint32_t authhandle = 0;
+       unsigned char cont = 0;
+       uint32_t ordinal;
+       int ret;
+
+       ordinal = htonl(TPM_ORD_LOADKEY2);
+
+       /* session for loading the key */
+       ret = oiap(tb, &authhandle, enonce);
+       if (ret < 0) {
+               pr_info("oiap failed (%d)\n", ret);
+               return ret;
+       }
+
+       /* generate odd nonce */
+       ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
+       if (ret < 0) {
+               pr_info("tpm_get_random failed (%d)\n", ret);
+               return ret;
+       }
+
+       /* calculate authorization HMAC value */
+       ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
+                          nonceodd, cont, sizeof(uint32_t), &ordinal,
+                          keybloblen, keyblob, 0, 0);
+       if (ret < 0)
+               return ret;
+
+       /* build the request buffer */
+       INIT_BUF(tb);
+       store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+       store32(tb, TPM_LOADKEY2_SIZE + keybloblen);
+       store32(tb, TPM_ORD_LOADKEY2);
+       store32(tb, keyhandle);
+       storebytes(tb, keyblob, keybloblen);
+       store32(tb, authhandle);
+       storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+       store8(tb, cont);
+       storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+
+       ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+       if (ret < 0) {
+               pr_info("authhmac failed (%d)\n", ret);
+               return ret;
+       }
+
+       ret = TSS_checkhmac1(tb->data, ordinal, nonceodd, keyauth,
+                            SHA1_DIGEST_SIZE, 0, 0);
+       if (ret < 0) {
+               pr_info("TSS_checkhmac1 failed (%d)\n", ret);
+               return ret;
+       }
+
+       *newhandle = LOAD32(tb->data, TPM_DATA_OFFSET);
+       return 0;
+}
+
+/*
+ * Execute the FlushSpecific TPM command
+ */
+static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle)
+{
+       INIT_BUF(tb);
+       store16(tb, TPM_TAG_RQU_COMMAND);
+       store32(tb, TPM_FLUSHSPECIFIC_SIZE);
+       store32(tb, TPM_ORD_FLUSHSPECIFIC);
+       store32(tb, handle);
+       store32(tb, TPM_RT_KEY);
+
+       return trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+}
+
+/*
+ * Decrypt a blob provided by userspace using a specific key handle.
+ * The handle is a well known handle or previously loaded by e.g. LoadKey2
+ */
+static int tpm_unbind(struct tpm_buf *tb,
+                       uint32_t keyhandle, unsigned char *keyauth,
+                       const unsigned char *blob, uint32_t bloblen,
+                       void *out, uint32_t outlen)
+{
+       unsigned char nonceodd[TPM_NONCE_SIZE];
+       unsigned char enonce[TPM_NONCE_SIZE];
+       unsigned char authdata[SHA1_DIGEST_SIZE];
+       uint32_t authhandle = 0;
+       unsigned char cont = 0;
+       uint32_t ordinal;
+       uint32_t datalen;
+       int ret;
+
+       ordinal = htonl(TPM_ORD_UNBIND);
+       datalen = htonl(bloblen);
+
+       /* session for loading the key */
+       ret = oiap(tb, &authhandle, enonce);
+       if (ret < 0) {
+               pr_info("oiap failed (%d)\n", ret);
+               return ret;
+       }
+
+       /* generate odd nonce */
+       ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
+       if (ret < 0) {
+               pr_info("tpm_get_random failed (%d)\n", ret);
+               return ret;
+       }
+
+       /* calculate authorization HMAC value */
+       ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
+                          nonceodd, cont, sizeof(uint32_t), &ordinal,
+                          sizeof(uint32_t), &datalen,
+                          bloblen, blob, 0, 0);
+       if (ret < 0)
+               return ret;
+
+       /* build the request buffer */
+       INIT_BUF(tb);
+       store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+       store32(tb, TPM_UNBIND_SIZE + bloblen);
+       store32(tb, TPM_ORD_UNBIND);
+       store32(tb, keyhandle);
+       store32(tb, bloblen);
+       storebytes(tb, blob, bloblen);
+       store32(tb, authhandle);
+       storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+       store8(tb, cont);
+       storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+
+       ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+       if (ret < 0) {
+               pr_info("authhmac failed (%d)\n", ret);
+               return ret;
+       }
+
+       datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
+
+       ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
+                            keyauth, SHA1_DIGEST_SIZE,
+                            sizeof(uint32_t), TPM_DATA_OFFSET,
+                            datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
+                            0, 0);
+       if (ret < 0) {
+               pr_info("TSS_checkhmac1 failed (%d)\n", ret);
+               return ret;
+       }
+
+       memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
+              min(outlen, datalen));
+
+       return datalen;
+}
+
+/*
+ * Sign a blob provided by userspace (that has had the hash function applied)
+ * using a specific key handle.  The handle is assumed to have been previously
+ * loaded by e.g. LoadKey2.
+ *
+ * Note that the key signature scheme of the used key should be set to
+ * TPM_SS_RSASSAPKCS1v15_DER.  This allows the hashed input to be of any size
+ * up to key_length_in_bytes - 11 and not be limited to size 20 like the
+ * TPM_SS_RSASSAPKCS1v15_SHA1 signature scheme.
+ */
+static int tpm_sign(struct tpm_buf *tb,
+                   uint32_t keyhandle, unsigned char *keyauth,
+                   const unsigned char *blob, uint32_t bloblen,
+                   void *out, uint32_t outlen)
+{
+       unsigned char nonceodd[TPM_NONCE_SIZE];
+       unsigned char enonce[TPM_NONCE_SIZE];
+       unsigned char authdata[SHA1_DIGEST_SIZE];
+       uint32_t authhandle = 0;
+       unsigned char cont = 0;
+       uint32_t ordinal;
+       uint32_t datalen;
+       int ret;
+
+       ordinal = htonl(TPM_ORD_SIGN);
+       datalen = htonl(bloblen);
+
+       /* session for loading the key */
+       ret = oiap(tb, &authhandle, enonce);
+       if (ret < 0) {
+               pr_info("oiap failed (%d)\n", ret);
+               return ret;
+       }
+
+       /* generate odd nonce */
+       ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
+       if (ret < 0) {
+               pr_info("tpm_get_random failed (%d)\n", ret);
+               return ret;
+       }
+
+       /* calculate authorization HMAC value */
+       ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
+                          nonceodd, cont, sizeof(uint32_t), &ordinal,
+                          sizeof(uint32_t), &datalen,
+                          bloblen, blob, 0, 0);
+       if (ret < 0)
+               return ret;
+
+       /* build the request buffer */
+       INIT_BUF(tb);
+       store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+       store32(tb, TPM_SIGN_SIZE + bloblen);
+       store32(tb, TPM_ORD_SIGN);
+       store32(tb, keyhandle);
+       store32(tb, bloblen);
+       storebytes(tb, blob, bloblen);
+       store32(tb, authhandle);
+       storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+       store8(tb, cont);
+       storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+
+       ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+       if (ret < 0) {
+               pr_info("authhmac failed (%d)\n", ret);
+               return ret;
+       }
+
+       datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
+
+       ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
+                            keyauth, SHA1_DIGEST_SIZE,
+                            sizeof(uint32_t), TPM_DATA_OFFSET,
+                            datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
+                            0, 0);
+       if (ret < 0) {
+               pr_info("TSS_checkhmac1 failed (%d)\n", ret);
+               return ret;
+       }
+
+       memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
+              min(datalen, outlen));
+
+       return datalen;
+}
+/*
+ * Maximum buffer size for the BER/DER encoded public key.  The public key
+ * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048
+ * bit key and e is usually 65537
+ * The encoding overhead is:
+ * - max 4 bytes for SEQUENCE
+ *   - max 4 bytes for INTEGER n type/length
+ *     - 257 bytes of n
+ *   - max 2 bytes for INTEGER e type/length
+ *     - 3 bytes of e
+ */
+#define PUB_KEY_BUF_SIZE (4 + 4 + 257 + 2 + 3)
+
+/*
+ * Provide a part of a description of the key for /proc/keys.
+ */
+static void asym_tpm_describe(const struct key *asymmetric_key,
+                             struct seq_file *m)
+{
+       struct tpm_key *tk = asymmetric_key->payload.data[asym_crypto];
+
+       if (!tk)
+               return;
+
+       seq_printf(m, "TPM1.2/Blob");
+}
+
+static void asym_tpm_destroy(void *payload0, void *payload3)
+{
+       struct tpm_key *tk = payload0;
+
+       if (!tk)
+               return;
+
+       kfree(tk->blob);
+       tk->blob_len = 0;
+
+       kfree(tk);
+}
+
+/* How many bytes will it take to encode the length */
+static inline uint32_t definite_length(uint32_t len)
+{
+       if (len <= 127)
+               return 1;
+       if (len <= 255)
+               return 2;
+       return 3;
+}
+
+static inline uint8_t *encode_tag_length(uint8_t *buf, uint8_t tag,
+                                        uint32_t len)
+{
+       *buf++ = tag;
+
+       if (len <= 127) {
+               buf[0] = len;
+               return buf + 1;
+       }
+
+       if (len <= 255) {
+               buf[0] = 0x81;
+               buf[1] = len;
+               return buf + 2;
+       }
+
+       buf[0] = 0x82;
+       put_unaligned_be16(len, buf + 1);
+       return buf + 3;
+}
+
+static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf)
+{
+       uint8_t *cur = buf;
+       uint32_t n_len = definite_length(len) + 1 + len + 1;
+       uint32_t e_len = definite_length(3) + 1 + 3;
+       uint8_t e[3] = { 0x01, 0x00, 0x01 };
+
+       /* SEQUENCE */
+       cur = encode_tag_length(cur, 0x30, n_len + e_len);
+       /* INTEGER n */
+       cur = encode_tag_length(cur, 0x02, len + 1);
+       cur[0] = 0x00;
+       memcpy(cur + 1, pub_key, len);
+       cur += len + 1;
+       cur = encode_tag_length(cur, 0x02, sizeof(e));
+       memcpy(cur, e, sizeof(e));
+       cur += sizeof(e);
+
+       return cur - buf;
+}
+
+/*
+ * Determine the crypto algorithm name.
+ */
+static int determine_akcipher(const char *encoding, const char *hash_algo,
+                             char alg_name[CRYPTO_MAX_ALG_NAME])
+{
+       if (strcmp(encoding, "pkcs1") == 0) {
+               if (!hash_algo) {
+                       strcpy(alg_name, "pkcs1pad(rsa)");
+                       return 0;
+               }
+
+               if (snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(rsa,%s)",
+                            hash_algo) >= CRYPTO_MAX_ALG_NAME)
+                       return -EINVAL;
+
+               return 0;
+       }
+
+       if (strcmp(encoding, "raw") == 0) {
+               strcpy(alg_name, "rsa");
+               return 0;
+       }
+
+       return -ENOPKG;
+}
+
+/*
+ * Query information about a key.
+ */
+static int tpm_key_query(const struct kernel_pkey_params *params,
+                        struct kernel_pkey_query *info)
+{
+       struct tpm_key *tk = params->key->payload.data[asym_crypto];
+       int ret;
+       char alg_name[CRYPTO_MAX_ALG_NAME];
+       struct crypto_akcipher *tfm;
+       uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
+       uint32_t der_pub_key_len;
+       int len;
+
+       /* TPM only works on private keys, public keys still done in software */
+       ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
+       if (ret < 0)
+               return ret;
+
+       tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
+                                        der_pub_key);
+
+       ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
+       if (ret < 0)
+               goto error_free_tfm;
+
+       len = crypto_akcipher_maxsize(tfm);
+
+       info->key_size = tk->key_len;
+       info->max_data_size = tk->key_len / 8;
+       info->max_sig_size = len;
+       info->max_enc_size = len;
+       info->max_dec_size = tk->key_len / 8;
+
+       info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT |
+                             KEYCTL_SUPPORTS_DECRYPT |
+                             KEYCTL_SUPPORTS_VERIFY |
+                             KEYCTL_SUPPORTS_SIGN;
+
+       ret = 0;
+error_free_tfm:
+       crypto_free_akcipher(tfm);
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+
+/*
+ * Encryption operation is performed with the public key.  Hence it is done
+ * in software
+ */
+static int tpm_key_encrypt(struct tpm_key *tk,
+                          struct kernel_pkey_params *params,
+                          const void *in, void *out)
+{
+       char alg_name[CRYPTO_MAX_ALG_NAME];
+       struct crypto_akcipher *tfm;
+       struct akcipher_request *req;
+       struct crypto_wait cwait;
+       struct scatterlist in_sg, out_sg;
+       uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
+       uint32_t der_pub_key_len;
+       int ret;
+
+       pr_devel("==>%s()\n", __func__);
+
+       ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
+       if (ret < 0)
+               return ret;
+
+       tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
+                                        der_pub_key);
+
+       ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
+       if (ret < 0)
+               goto error_free_tfm;
+
+       req = akcipher_request_alloc(tfm, GFP_KERNEL);
+       if (!req)
+               goto error_free_tfm;
+
+       sg_init_one(&in_sg, in, params->in_len);
+       sg_init_one(&out_sg, out, params->out_len);
+       akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len,
+                                  params->out_len);
+       crypto_init_wait(&cwait);
+       akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                     CRYPTO_TFM_REQ_MAY_SLEEP,
+                                     crypto_req_done, &cwait);
+
+       ret = crypto_akcipher_encrypt(req);
+       ret = crypto_wait_req(ret, &cwait);
+
+       if (ret == 0)
+               ret = req->dst_len;
+
+       akcipher_request_free(req);
+error_free_tfm:
+       crypto_free_akcipher(tfm);
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+
+/*
+ * Decryption operation is performed with the private key in the TPM.
+ */
+static int tpm_key_decrypt(struct tpm_key *tk,
+                          struct kernel_pkey_params *params,
+                          const void *in, void *out)
+{
+       struct tpm_buf *tb;
+       uint32_t keyhandle;
+       uint8_t srkauth[SHA1_DIGEST_SIZE];
+       uint8_t keyauth[SHA1_DIGEST_SIZE];
+       int r;
+
+       pr_devel("==>%s()\n", __func__);
+
+       if (params->hash_algo)
+               return -ENOPKG;
+
+       if (strcmp(params->encoding, "pkcs1"))
+               return -ENOPKG;
+
+       tb = kzalloc(sizeof(*tb), GFP_KERNEL);
+       if (!tb)
+               return -ENOMEM;
+
+       /* TODO: Handle a non-all zero SRK authorization */
+       memset(srkauth, 0, sizeof(srkauth));
+
+       r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+                               tk->blob, tk->blob_len, &keyhandle);
+       if (r < 0) {
+               pr_devel("loadkey2 failed (%d)\n", r);
+               goto error;
+       }
+
+       /* TODO: Handle a non-all zero key authorization */
+       memset(keyauth, 0, sizeof(keyauth));
+
+       r = tpm_unbind(tb, keyhandle, keyauth,
+                      in, params->in_len, out, params->out_len);
+       if (r < 0)
+               pr_devel("tpm_unbind failed (%d)\n", r);
+
+       if (tpm_flushspecific(tb, keyhandle) < 0)
+               pr_devel("flushspecific failed (%d)\n", r);
+
+error:
+       kzfree(tb);
+       pr_devel("<==%s() = %d\n", __func__, r);
+       return r;
+}
+
+/*
+ * Hash algorithm OIDs plus ASN.1 DER wrappings [RFC4880 sec 5.2.2].
+ */
+static const u8 digest_info_md5[] = {
+       0x30, 0x20, 0x30, 0x0c, 0x06, 0x08,
+       0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05, /* OID */
+       0x05, 0x00, 0x04, 0x10
+};
+
+static const u8 digest_info_sha1[] = {
+       0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
+       0x2b, 0x0e, 0x03, 0x02, 0x1a,
+       0x05, 0x00, 0x04, 0x14
+};
+
+static const u8 digest_info_rmd160[] = {
+       0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
+       0x2b, 0x24, 0x03, 0x02, 0x01,
+       0x05, 0x00, 0x04, 0x14
+};
+
+static const u8 digest_info_sha224[] = {
+       0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09,
+       0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04,
+       0x05, 0x00, 0x04, 0x1c
+};
+
+static const u8 digest_info_sha256[] = {
+       0x30, 0x31, 0x30, 0x0d, 0x06, 0x09,
+       0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01,
+       0x05, 0x00, 0x04, 0x20
+};
+
+static const u8 digest_info_sha384[] = {
+       0x30, 0x41, 0x30, 0x0d, 0x06, 0x09,
+       0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02,
+       0x05, 0x00, 0x04, 0x30
+};
+
+static const u8 digest_info_sha512[] = {
+       0x30, 0x51, 0x30, 0x0d, 0x06, 0x09,
+       0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03,
+       0x05, 0x00, 0x04, 0x40
+};
+
+static const struct asn1_template {
+       const char      *name;
+       const u8        *data;
+       size_t          size;
+} asn1_templates[] = {
+#define _(X) { #X, digest_info_##X, sizeof(digest_info_##X) }
+       _(md5),
+       _(sha1),
+       _(rmd160),
+       _(sha256),
+       _(sha384),
+       _(sha512),
+       _(sha224),
+       { NULL }
+#undef _
+};
+
+static const struct asn1_template *lookup_asn1(const char *name)
+{
+       const struct asn1_template *p;
+
+       for (p = asn1_templates; p->name; p++)
+               if (strcmp(name, p->name) == 0)
+                       return p;
+       return NULL;
+}
+
+/*
+ * Sign operation is performed with the private key in the TPM.
+ */
+static int tpm_key_sign(struct tpm_key *tk,
+                       struct kernel_pkey_params *params,
+                       const void *in, void *out)
+{
+       struct tpm_buf *tb;
+       uint32_t keyhandle;
+       uint8_t srkauth[SHA1_DIGEST_SIZE];
+       uint8_t keyauth[SHA1_DIGEST_SIZE];
+       void *asn1_wrapped = NULL;
+       uint32_t in_len = params->in_len;
+       int r;
+
+       pr_devel("==>%s()\n", __func__);
+
+       if (strcmp(params->encoding, "pkcs1"))
+               return -ENOPKG;
+
+       if (params->hash_algo) {
+               const struct asn1_template *asn1 =
+                                               lookup_asn1(params->hash_algo);
+
+               if (!asn1)
+                       return -ENOPKG;
+
+               /* request enough space for the ASN.1 template + input hash */
+               asn1_wrapped = kzalloc(in_len + asn1->size, GFP_KERNEL);
+               if (!asn1_wrapped)
+                       return -ENOMEM;
+
+               /* Copy ASN.1 template, then the input */
+               memcpy(asn1_wrapped, asn1->data, asn1->size);
+               memcpy(asn1_wrapped + asn1->size, in, in_len);
+
+               in = asn1_wrapped;
+               in_len += asn1->size;
+       }
+
+       if (in_len > tk->key_len / 8 - 11) {
+               r = -EOVERFLOW;
+               goto error_free_asn1_wrapped;
+       }
+
+       r = -ENOMEM;
+       tb = kzalloc(sizeof(*tb), GFP_KERNEL);
+       if (!tb)
+               goto error_free_asn1_wrapped;
+
+       /* TODO: Handle a non-all zero SRK authorization */
+       memset(srkauth, 0, sizeof(srkauth));
+
+       r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+                        tk->blob, tk->blob_len, &keyhandle);
+       if (r < 0) {
+               pr_devel("loadkey2 failed (%d)\n", r);
+               goto error_free_tb;
+       }
+
+       /* TODO: Handle a non-all zero key authorization */
+       memset(keyauth, 0, sizeof(keyauth));
+
+       r = tpm_sign(tb, keyhandle, keyauth, in, in_len, out, params->out_len);
+       if (r < 0)
+               pr_devel("tpm_sign failed (%d)\n", r);
+
+       if (tpm_flushspecific(tb, keyhandle) < 0)
+               pr_devel("flushspecific failed (%d)\n", r);
+
+error_free_tb:
+       kzfree(tb);
+error_free_asn1_wrapped:
+       kfree(asn1_wrapped);
+       pr_devel("<==%s() = %d\n", __func__, r);
+       return r;
+}
+
+/*
+ * Do encryption, decryption and signing ops.
+ */
+static int tpm_key_eds_op(struct kernel_pkey_params *params,
+                         const void *in, void *out)
+{
+       struct tpm_key *tk = params->key->payload.data[asym_crypto];
+       int ret = -EOPNOTSUPP;
+
+       /* Perform the encryption calculation. */
+       switch (params->op) {
+       case kernel_pkey_encrypt:
+               ret = tpm_key_encrypt(tk, params, in, out);
+               break;
+       case kernel_pkey_decrypt:
+               ret = tpm_key_decrypt(tk, params, in, out);
+               break;
+       case kernel_pkey_sign:
+               ret = tpm_key_sign(tk, params, in, out);
+               break;
+       default:
+               BUG();
+       }
+
+       return ret;
+}
+
+/*
+ * Verify a signature using a public key.
+ */
+static int tpm_key_verify_signature(const struct key *key,
+                                   const struct public_key_signature *sig)
+{
+       const struct tpm_key *tk = key->payload.data[asym_crypto];
+       struct crypto_wait cwait;
+       struct crypto_akcipher *tfm;
+       struct akcipher_request *req;
+       struct scatterlist sig_sg, digest_sg;
+       char alg_name[CRYPTO_MAX_ALG_NAME];
+       uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
+       uint32_t der_pub_key_len;
+       void *output;
+       unsigned int outlen;
+       int ret;
+
+       pr_devel("==>%s()\n", __func__);
+
+       BUG_ON(!tk);
+       BUG_ON(!sig);
+       BUG_ON(!sig->s);
+
+       if (!sig->digest)
+               return -ENOPKG;
+
+       ret = determine_akcipher(sig->encoding, sig->hash_algo, alg_name);
+       if (ret < 0)
+               return ret;
+
+       tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
+                                        der_pub_key);
+
+       ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
+       if (ret < 0)
+               goto error_free_tfm;
+
+       ret = -ENOMEM;
+       req = akcipher_request_alloc(tfm, GFP_KERNEL);
+       if (!req)
+               goto error_free_tfm;
+
+       ret = -ENOMEM;
+       outlen = crypto_akcipher_maxsize(tfm);
+       output = kmalloc(outlen, GFP_KERNEL);
+       if (!output)
+               goto error_free_req;
+
+       sg_init_one(&sig_sg, sig->s, sig->s_size);
+       sg_init_one(&digest_sg, output, outlen);
+       akcipher_request_set_crypt(req, &sig_sg, &digest_sg, sig->s_size,
+                                  outlen);
+       crypto_init_wait(&cwait);
+       akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                     CRYPTO_TFM_REQ_MAY_SLEEP,
+                                     crypto_req_done, &cwait);
+
+       /* Perform the verification calculation.  This doesn't actually do the
+        * verification, but rather calculates the hash expected by the
+        * signature and returns that to us.
+        */
+       ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait);
+       if (ret)
+               goto out_free_output;
+
+       /* Do the actual verification step. */
+       if (req->dst_len != sig->digest_size ||
+           memcmp(sig->digest, output, sig->digest_size) != 0)
+               ret = -EKEYREJECTED;
+
+out_free_output:
+       kfree(output);
+error_free_req:
+       akcipher_request_free(req);
+error_free_tfm:
+       crypto_free_akcipher(tfm);
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       if (WARN_ON_ONCE(ret > 0))
+               ret = -EINVAL;
+       return ret;
+}
+
+/*
+ * Parse enough information out of TPM_KEY structure:
+ * TPM_STRUCT_VER -> 4 bytes
+ * TPM_KEY_USAGE -> 2 bytes
+ * TPM_KEY_FLAGS -> 4 bytes
+ * TPM_AUTH_DATA_USAGE -> 1 byte
+ * TPM_KEY_PARMS -> variable
+ * UINT32 PCRInfoSize -> 4 bytes
+ * BYTE* -> PCRInfoSize bytes
+ * TPM_STORE_PUBKEY
+ * UINT32 encDataSize;
+ * BYTE* -> encDataSize;
+ *
+ * TPM_KEY_PARMS:
+ * TPM_ALGORITHM_ID -> 4 bytes
+ * TPM_ENC_SCHEME -> 2 bytes
+ * TPM_SIG_SCHEME -> 2 bytes
+ * UINT32 parmSize -> 4 bytes
+ * BYTE* -> variable
+ */
+static int extract_key_parameters(struct tpm_key *tk)
+{
+       const void *cur = tk->blob;
+       uint32_t len = tk->blob_len;
+       const void *pub_key;
+       uint32_t sz;
+       uint32_t key_len;
+
+       if (len < 11)
+               return -EBADMSG;
+
+       /* Ensure this is a legacy key */
+       if (get_unaligned_be16(cur + 4) != 0x0015)
+               return -EBADMSG;
+
+       /* Skip to TPM_KEY_PARMS */
+       cur += 11;
+       len -= 11;
+
+       if (len < 12)
+               return -EBADMSG;
+
+       /* Make sure this is an RSA key */
+       if (get_unaligned_be32(cur) != 0x00000001)
+               return -EBADMSG;
+
+       /* Make sure this is TPM_ES_RSAESPKCSv15 encoding scheme */
+       if (get_unaligned_be16(cur + 4) != 0x0002)
+               return -EBADMSG;
+
+       /* Make sure this is TPM_SS_RSASSAPKCS1v15_DER signature scheme */
+       if (get_unaligned_be16(cur + 6) != 0x0003)
+               return -EBADMSG;
+
+       sz = get_unaligned_be32(cur + 8);
+       if (len < sz + 12)
+               return -EBADMSG;
+
+       /* Move to TPM_RSA_KEY_PARMS */
+       len -= 12;
+       cur += 12;
+
+       /* Grab the RSA key length */
+       key_len = get_unaligned_be32(cur);
+
+       switch (key_len) {
+       case 512:
+       case 1024:
+       case 1536:
+       case 2048:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /* Move just past TPM_KEY_PARMS */
+       cur += sz;
+       len -= sz;
+
+       if (len < 4)
+               return -EBADMSG;
+
+       sz = get_unaligned_be32(cur);
+       if (len < 4 + sz)
+               return -EBADMSG;
+
+       /* Move to TPM_STORE_PUBKEY */
+       cur += 4 + sz;
+       len -= 4 + sz;
+
+       /* Grab the size of the public key, it should jive with the key size */
+       sz = get_unaligned_be32(cur);
+       if (sz > 256)
+               return -EINVAL;
+
+       pub_key = cur + 4;
+
+       tk->key_len = key_len;
+       tk->pub_key = pub_key;
+       tk->pub_key_len = sz;
+
+       return 0;
+}
+
+/* Given the blob, parse it and load it into the TPM */
+struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len)
+{
+       int r;
+       struct tpm_key *tk;
+
+       r = tpm_is_tpm2(NULL);
+       if (r < 0)
+               goto error;
+
+       /* We don't support TPM2 yet */
+       if (r > 0) {
+               r = -ENODEV;
+               goto error;
+       }
+
+       r = -ENOMEM;
+       tk = kzalloc(sizeof(struct tpm_key), GFP_KERNEL);
+       if (!tk)
+               goto error;
+
+       tk->blob = kmemdup(blob, blob_len, GFP_KERNEL);
+       if (!tk->blob)
+               goto error_memdup;
+
+       tk->blob_len = blob_len;
+
+       r = extract_key_parameters(tk);
+       if (r < 0)
+               goto error_extract;
+
+       return tk;
+
+error_extract:
+       kfree(tk->blob);
+       tk->blob_len = 0;
+error_memdup:
+       kfree(tk);
+error:
+       return ERR_PTR(r);
+}
+EXPORT_SYMBOL_GPL(tpm_key_create);
+
+/*
+ * TPM-based asymmetric key subtype
+ */
+struct asymmetric_key_subtype asym_tpm_subtype = {
+       .owner                  = THIS_MODULE,
+       .name                   = "asym_tpm",
+       .name_len               = sizeof("asym_tpm") - 1,
+       .describe               = asym_tpm_describe,
+       .destroy                = asym_tpm_destroy,
+       .query                  = tpm_key_query,
+       .eds_op                 = tpm_key_eds_op,
+       .verify_signature       = tpm_key_verify_signature,
+};
+EXPORT_SYMBOL_GPL(asym_tpm_subtype);
+
+MODULE_DESCRIPTION("TPM based asymmetric key subtype");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
index ca8e9ac34ce621613d29de02ba051eba593e7fc6..7be1ccf4fa9f2234c290e9bcffef773f176354aa 100644 (file)
@@ -16,3 +16,6 @@ extern struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id);
 extern int __asymmetric_key_hex_to_key_id(const char *id,
                                          struct asymmetric_key_id *match_id,
                                          size_t hexlen);
+
+extern int asymmetric_key_eds_op(struct kernel_pkey_params *params,
+                                const void *in, void *out);
index 26539e9a8bda41c37a664490e037f2365da7f15c..69a0788a7de5d08eddc6ad82451f2515ed94f6d4 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/ctype.h>
 #include <keys/system_keyring.h>
+#include <keys/user-type.h>
 #include "asymmetric_keys.h"
 
 MODULE_LICENSE("GPL");
@@ -538,6 +539,45 @@ out:
        return ret;
 }
 
+int asymmetric_key_eds_op(struct kernel_pkey_params *params,
+                         const void *in, void *out)
+{
+       const struct asymmetric_key_subtype *subtype;
+       struct key *key = params->key;
+       int ret;
+
+       pr_devel("==>%s()\n", __func__);
+
+       if (key->type != &key_type_asymmetric)
+               return -EINVAL;
+       subtype = asymmetric_key_subtype(key);
+       if (!subtype ||
+           !key->payload.data[0])
+               return -EINVAL;
+       if (!subtype->eds_op)
+               return -ENOTSUPP;
+
+       ret = subtype->eds_op(params, in, out);
+
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+
+static int asymmetric_key_verify_signature(struct kernel_pkey_params *params,
+                                          const void *in, const void *in2)
+{
+       struct public_key_signature sig = {
+               .s_size         = params->in2_len,
+               .digest_size    = params->in_len,
+               .encoding       = params->encoding,
+               .hash_algo      = params->hash_algo,
+               .digest         = (void *)in,
+               .s              = (void *)in2,
+       };
+
+       return verify_signature(params->key, &sig);
+}
+
 struct key_type key_type_asymmetric = {
        .name                   = "asymmetric",
        .preparse               = asymmetric_key_preparse,
@@ -548,6 +588,9 @@ struct key_type key_type_asymmetric = {
        .destroy                = asymmetric_key_destroy,
        .describe               = asymmetric_key_describe,
        .lookup_restriction     = asymmetric_lookup_restriction,
+       .asym_query             = query_asymmetric_key,
+       .asym_eds_op            = asymmetric_key_eds_op,
+       .asym_verify_signature  = asymmetric_key_verify_signature,
 };
 EXPORT_SYMBOL_GPL(key_type_asymmetric);
 
index 0f134162cef4b5f89c016db315df1b52de18ef16..f0d56e1a8b7e2b4971004959261b0d5c18cf0a6b 100644 (file)
@@ -271,6 +271,7 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen,
        switch (ctx->last_oid) {
        case OID_rsaEncryption:
                ctx->sinfo->sig->pkey_algo = "rsa";
+               ctx->sinfo->sig->encoding = "pkcs1";
                break;
        default:
                printk("Unsupported pkey algo: %u\n", ctx->last_oid);
diff --git a/crypto/asymmetric_keys/pkcs8.asn1 b/crypto/asymmetric_keys/pkcs8.asn1
new file mode 100644 (file)
index 0000000..702c41a
--- /dev/null
@@ -0,0 +1,24 @@
+--
+-- This is the unencrypted variant
+--
+PrivateKeyInfo ::= SEQUENCE {
+       version                 Version,
+       privateKeyAlgorithm     PrivateKeyAlgorithmIdentifier,
+       privateKey              PrivateKey,
+       attributes              [0] IMPLICIT Attributes OPTIONAL
+}
+
+Version ::= INTEGER  ({ pkcs8_note_version })
+
+PrivateKeyAlgorithmIdentifier ::= AlgorithmIdentifier ({ pkcs8_note_algo })
+
+PrivateKey ::= OCTET STRING ({ pkcs8_note_key })
+
+Attributes ::= SET OF Attribute
+
+Attribute ::= ANY
+
+AlgorithmIdentifier ::= SEQUENCE {
+       algorithm   OBJECT IDENTIFIER ({ pkcs8_note_OID }),
+       parameters  ANY OPTIONAL
+}
diff --git a/crypto/asymmetric_keys/pkcs8_parser.c b/crypto/asymmetric_keys/pkcs8_parser.c
new file mode 100644 (file)
index 0000000..5f6a7ec
--- /dev/null
@@ -0,0 +1,184 @@
+/* PKCS#8 Private Key parser [RFC 5208].
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "PKCS8: "fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/oid_registry.h>
+#include <keys/asymmetric-subtype.h>
+#include <keys/asymmetric-parser.h>
+#include <crypto/public_key.h>
+#include "pkcs8.asn1.h"
+
+struct pkcs8_parse_context {
+       struct public_key *pub;
+       unsigned long   data;                   /* Start of data */
+       enum OID        last_oid;               /* Last OID encountered */
+       enum OID        algo_oid;               /* Algorithm OID */
+       u32             key_size;
+       const void      *key;
+};
+
+/*
+ * Note an OID when we find one for later processing when we know how to
+ * interpret it.
+ */
+int pkcs8_note_OID(void *context, size_t hdrlen,
+                  unsigned char tag,
+                  const void *value, size_t vlen)
+{
+       struct pkcs8_parse_context *ctx = context;
+
+       ctx->last_oid = look_up_OID(value, vlen);
+       if (ctx->last_oid == OID__NR) {
+               char buffer[50];
+
+               sprint_oid(value, vlen, buffer, sizeof(buffer));
+               pr_info("Unknown OID: [%lu] %s\n",
+                       (unsigned long)value - ctx->data, buffer);
+       }
+       return 0;
+}
+
+/*
+ * Note the version number of the ASN.1 blob.
+ */
+int pkcs8_note_version(void *context, size_t hdrlen,
+                      unsigned char tag,
+                      const void *value, size_t vlen)
+{
+       if (vlen != 1 || ((const u8 *)value)[0] != 0) {
+               pr_warn("Unsupported PKCS#8 version\n");
+               return -EBADMSG;
+       }
+       return 0;
+}
+
+/*
+ * Note the public algorithm.
+ */
+int pkcs8_note_algo(void *context, size_t hdrlen,
+                   unsigned char tag,
+                   const void *value, size_t vlen)
+{
+       struct pkcs8_parse_context *ctx = context;
+
+       if (ctx->last_oid != OID_rsaEncryption)
+               return -ENOPKG;
+
+       ctx->pub->pkey_algo = "rsa";
+       return 0;
+}
+
+/*
+ * Note the key data of the ASN.1 blob.
+ */
+int pkcs8_note_key(void *context, size_t hdrlen,
+                  unsigned char tag,
+                  const void *value, size_t vlen)
+{
+       struct pkcs8_parse_context *ctx = context;
+
+       ctx->key = value;
+       ctx->key_size = vlen;
+       return 0;
+}
+
+/*
+ * Parse a PKCS#8 private key blob.
+ */
+static struct public_key *pkcs8_parse(const void *data, size_t datalen)
+{
+       struct pkcs8_parse_context ctx;
+       struct public_key *pub;
+       long ret;
+
+       memset(&ctx, 0, sizeof(ctx));
+
+       ret = -ENOMEM;
+       ctx.pub = kzalloc(sizeof(struct public_key), GFP_KERNEL);
+       if (!ctx.pub)
+               goto error;
+
+       ctx.data = (unsigned long)data;
+
+       /* Attempt to decode the private key */
+       ret = asn1_ber_decoder(&pkcs8_decoder, &ctx, data, datalen);
+       if (ret < 0)
+               goto error_decode;
+
+       ret = -ENOMEM;
+       pub = ctx.pub;
+       pub->key = kmemdup(ctx.key, ctx.key_size, GFP_KERNEL);
+       if (!pub->key)
+               goto error_decode;
+
+       pub->keylen = ctx.key_size;
+       pub->key_is_private = true;
+       return pub;
+
+error_decode:
+       kfree(ctx.pub);
+error:
+       return ERR_PTR(ret);
+}
+
+/*
+ * Attempt to parse a data blob for a key as a PKCS#8 private key.
+ */
+static int pkcs8_key_preparse(struct key_preparsed_payload *prep)
+{
+       struct public_key *pub;
+
+       pub = pkcs8_parse(prep->data, prep->datalen);
+       if (IS_ERR(pub))
+               return PTR_ERR(pub);
+
+       pr_devel("Cert Key Algo: %s\n", pub->pkey_algo);
+       pub->id_type = "PKCS8";
+
+       /* We're pinning the module by being linked against it */
+       __module_get(public_key_subtype.owner);
+       prep->payload.data[asym_subtype] = &public_key_subtype;
+       prep->payload.data[asym_key_ids] = NULL;
+       prep->payload.data[asym_crypto] = pub;
+       prep->payload.data[asym_auth] = NULL;
+       prep->quotalen = 100;
+       return 0;
+}
+
+static struct asymmetric_key_parser pkcs8_key_parser = {
+       .owner  = THIS_MODULE,
+       .name   = "pkcs8",
+       .parse  = pkcs8_key_preparse,
+};
+
+/*
+ * Module stuff
+ */
+static int __init pkcs8_key_init(void)
+{
+       return register_asymmetric_key_parser(&pkcs8_key_parser);
+}
+
+static void __exit pkcs8_key_exit(void)
+{
+       unregister_asymmetric_key_parser(&pkcs8_key_parser);
+}
+
+module_init(pkcs8_key_init);
+module_exit(pkcs8_key_exit);
+
+MODULE_DESCRIPTION("PKCS#8 certificate parser");
+MODULE_LICENSE("GPL");
index e929fe1e4106c7dfaff7c2bcf3186952f449b764..f5d85b47fcc6d23be7315f5d1d245538649f00e7 100644 (file)
@@ -59,6 +59,165 @@ static void public_key_destroy(void *payload0, void *payload3)
        public_key_signature_free(payload3);
 }
 
+/*
+ * Determine the crypto algorithm name.
+ */
+static
+int software_key_determine_akcipher(const char *encoding,
+                                   const char *hash_algo,
+                                   const struct public_key *pkey,
+                                   char alg_name[CRYPTO_MAX_ALG_NAME])
+{
+       int n;
+
+       if (strcmp(encoding, "pkcs1") == 0) {
+               /* The data wangled by the RSA algorithm is typically padded
+                * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
+                * sec 8.2].
+                */
+               if (!hash_algo)
+                       n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+                                    "pkcs1pad(%s)",
+                                    pkey->pkey_algo);
+               else
+                       n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+                                    "pkcs1pad(%s,%s)",
+                                    pkey->pkey_algo, hash_algo);
+               return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
+       }
+
+       if (strcmp(encoding, "raw") == 0) {
+               strcpy(alg_name, pkey->pkey_algo);
+               return 0;
+       }
+
+       return -ENOPKG;
+}
+
+/*
+ * Query information about a key.
+ */
+static int software_key_query(const struct kernel_pkey_params *params,
+                             struct kernel_pkey_query *info)
+{
+       struct crypto_akcipher *tfm;
+       struct public_key *pkey = params->key->payload.data[asym_crypto];
+       char alg_name[CRYPTO_MAX_ALG_NAME];
+       int ret, len;
+
+       ret = software_key_determine_akcipher(params->encoding,
+                                             params->hash_algo,
+                                             pkey, alg_name);
+       if (ret < 0)
+               return ret;
+
+       tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       if (pkey->key_is_private)
+               ret = crypto_akcipher_set_priv_key(tfm,
+                                                  pkey->key, pkey->keylen);
+       else
+               ret = crypto_akcipher_set_pub_key(tfm,
+                                                 pkey->key, pkey->keylen);
+       if (ret < 0)
+               goto error_free_tfm;
+
+       len = crypto_akcipher_maxsize(tfm);
+       info->key_size = len * 8;
+       info->max_data_size = len;
+       info->max_sig_size = len;
+       info->max_enc_size = len;
+       info->max_dec_size = len;
+       info->supported_ops = (KEYCTL_SUPPORTS_ENCRYPT |
+                              KEYCTL_SUPPORTS_VERIFY);
+       if (pkey->key_is_private)
+               info->supported_ops |= (KEYCTL_SUPPORTS_DECRYPT |
+                                       KEYCTL_SUPPORTS_SIGN);
+       ret = 0;
+
+error_free_tfm:
+       crypto_free_akcipher(tfm);
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+
+/*
+ * Do encryption, decryption and signing ops.
+ */
+static int software_key_eds_op(struct kernel_pkey_params *params,
+                              const void *in, void *out)
+{
+       const struct public_key *pkey = params->key->payload.data[asym_crypto];
+       struct akcipher_request *req;
+       struct crypto_akcipher *tfm;
+       struct crypto_wait cwait;
+       struct scatterlist in_sg, out_sg;
+       char alg_name[CRYPTO_MAX_ALG_NAME];
+       int ret;
+
+       pr_devel("==>%s()\n", __func__);
+
+       ret = software_key_determine_akcipher(params->encoding,
+                                             params->hash_algo,
+                                             pkey, alg_name);
+       if (ret < 0)
+               return ret;
+
+       tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       req = akcipher_request_alloc(tfm, GFP_KERNEL);
+       if (!req)
+               goto error_free_tfm;
+
+       if (pkey->key_is_private)
+               ret = crypto_akcipher_set_priv_key(tfm,
+                                                  pkey->key, pkey->keylen);
+       else
+               ret = crypto_akcipher_set_pub_key(tfm,
+                                                 pkey->key, pkey->keylen);
+       if (ret)
+               goto error_free_req;
+
+       sg_init_one(&in_sg, in, params->in_len);
+       sg_init_one(&out_sg, out, params->out_len);
+       akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len,
+                                  params->out_len);
+       crypto_init_wait(&cwait);
+       akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+                                     CRYPTO_TFM_REQ_MAY_SLEEP,
+                                     crypto_req_done, &cwait);
+
+       /* Perform the encryption calculation. */
+       switch (params->op) {
+       case kernel_pkey_encrypt:
+               ret = crypto_akcipher_encrypt(req);
+               break;
+       case kernel_pkey_decrypt:
+               ret = crypto_akcipher_decrypt(req);
+               break;
+       case kernel_pkey_sign:
+               ret = crypto_akcipher_sign(req);
+               break;
+       default:
+               BUG();
+       }
+
+       ret = crypto_wait_req(ret, &cwait);
+       if (ret == 0)
+               ret = req->dst_len;
+
+error_free_req:
+       akcipher_request_free(req);
+error_free_tfm:
+       crypto_free_akcipher(tfm);
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+
 /*
  * Verify a signature using a public key.
  */
@@ -69,8 +228,7 @@ int public_key_verify_signature(const struct public_key *pkey,
        struct crypto_akcipher *tfm;
        struct akcipher_request *req;
        struct scatterlist sig_sg, digest_sg;
-       const char *alg_name;
-       char alg_name_buf[CRYPTO_MAX_ALG_NAME];
+       char alg_name[CRYPTO_MAX_ALG_NAME];
        void *output;
        unsigned int outlen;
        int ret;
@@ -81,21 +239,11 @@ int public_key_verify_signature(const struct public_key *pkey,
        BUG_ON(!sig);
        BUG_ON(!sig->s);
 
-       if (!sig->digest)
-               return -ENOPKG;
-
-       alg_name = sig->pkey_algo;
-       if (strcmp(sig->pkey_algo, "rsa") == 0) {
-               /* The data wangled by the RSA algorithm is typically padded
-                * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
-                * sec 8.2].
-                */
-               if (snprintf(alg_name_buf, CRYPTO_MAX_ALG_NAME,
-                            "pkcs1pad(rsa,%s)", sig->hash_algo
-                            ) >= CRYPTO_MAX_ALG_NAME)
-                       return -EINVAL;
-               alg_name = alg_name_buf;
-       }
+       ret = software_key_determine_akcipher(sig->encoding,
+                                             sig->hash_algo,
+                                             pkey, alg_name);
+       if (ret < 0)
+               return ret;
 
        tfm = crypto_alloc_akcipher(alg_name, 0, 0);
        if (IS_ERR(tfm))
@@ -106,7 +254,12 @@ int public_key_verify_signature(const struct public_key *pkey,
        if (!req)
                goto error_free_tfm;
 
-       ret = crypto_akcipher_set_pub_key(tfm, pkey->key, pkey->keylen);
+       if (pkey->key_is_private)
+               ret = crypto_akcipher_set_priv_key(tfm,
+                                                  pkey->key, pkey->keylen);
+       else
+               ret = crypto_akcipher_set_pub_key(tfm,
+                                                 pkey->key, pkey->keylen);
        if (ret)
                goto error_free_req;
 
@@ -167,6 +320,8 @@ struct asymmetric_key_subtype public_key_subtype = {
        .name_len               = sizeof("public_key") - 1,
        .describe               = public_key_describe,
        .destroy                = public_key_destroy,
+       .query                  = software_key_query,
+       .eds_op                 = software_key_eds_op,
        .verify_signature       = public_key_verify_signature_2,
 };
 EXPORT_SYMBOL_GPL(public_key_subtype);
index 28198314bc39f4f4a27da38565619f6e262a7fd4..ad95a58c664275a9d561548fa79e6a027a74db0c 100644 (file)
@@ -16,7 +16,9 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/keyctl.h>
 #include <crypto/public_key.h>
+#include <keys/user-type.h>
 #include "asymmetric_keys.h"
 
 /*
@@ -36,6 +38,99 @@ void public_key_signature_free(struct public_key_signature *sig)
 }
 EXPORT_SYMBOL_GPL(public_key_signature_free);
 
+/**
+ * query_asymmetric_key - Get information about an aymmetric key.
+ * @params: Various parameters.
+ * @info: Where to put the information.
+ */
+int query_asymmetric_key(const struct kernel_pkey_params *params,
+                        struct kernel_pkey_query *info)
+{
+       const struct asymmetric_key_subtype *subtype;
+       struct key *key = params->key;
+       int ret;
+
+       pr_devel("==>%s()\n", __func__);
+
+       if (key->type != &key_type_asymmetric)
+               return -EINVAL;
+       subtype = asymmetric_key_subtype(key);
+       if (!subtype ||
+           !key->payload.data[0])
+               return -EINVAL;
+       if (!subtype->query)
+               return -ENOTSUPP;
+
+       ret = subtype->query(params, info);
+
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(query_asymmetric_key);
+
+/**
+ * encrypt_blob - Encrypt data using an asymmetric key
+ * @params: Various parameters
+ * @data: Data blob to be encrypted, length params->data_len
+ * @enc: Encrypted data buffer, length params->enc_len
+ *
+ * Encrypt the specified data blob using the private key specified by
+ * params->key.  The encrypted data is wrapped in an encoding if
+ * params->encoding is specified (eg. "pkcs1").
+ *
+ * Returns the length of the data placed in the encrypted data buffer or an
+ * error.
+ */
+int encrypt_blob(struct kernel_pkey_params *params,
+                const void *data, void *enc)
+{
+       params->op = kernel_pkey_encrypt;
+       return asymmetric_key_eds_op(params, data, enc);
+}
+EXPORT_SYMBOL_GPL(encrypt_blob);
+
+/**
+ * decrypt_blob - Decrypt data using an asymmetric key
+ * @params: Various parameters
+ * @enc: Encrypted data to be decrypted, length params->enc_len
+ * @data: Decrypted data buffer, length params->data_len
+ *
+ * Decrypt the specified data blob using the private key specified by
+ * params->key.  The decrypted data is wrapped in an encoding if
+ * params->encoding is specified (eg. "pkcs1").
+ *
+ * Returns the length of the data placed in the decrypted data buffer or an
+ * error.
+ */
+int decrypt_blob(struct kernel_pkey_params *params,
+                const void *enc, void *data)
+{
+       params->op = kernel_pkey_decrypt;
+       return asymmetric_key_eds_op(params, enc, data);
+}
+EXPORT_SYMBOL_GPL(decrypt_blob);
+
+/**
+ * create_signature - Sign some data using an asymmetric key
+ * @params: Various parameters
+ * @data: Data blob to be signed, length params->data_len
+ * @enc: Signature buffer, length params->enc_len
+ *
+ * Sign the specified data blob using the private key specified by params->key.
+ * The signature is wrapped in an encoding if params->encoding is specified
+ * (eg. "pkcs1").  If the encoding needs to know the digest type, this can be
+ * passed through params->hash_algo (eg. "sha1").
+ *
+ * Returns the length of the data placed in the signature buffer or an error.
+ */
+int create_signature(struct kernel_pkey_params *params,
+                    const void *data, void *enc)
+{
+       params->op = kernel_pkey_sign;
+       return asymmetric_key_eds_op(params, data, enc);
+}
+EXPORT_SYMBOL_GPL(create_signature);
+
 /**
  * verify_signature - Initiate the use of an asymmetric key to verify a signature
  * @key: The asymmetric key to verify against
diff --git a/crypto/asymmetric_keys/tpm.asn1 b/crypto/asymmetric_keys/tpm.asn1
new file mode 100644 (file)
index 0000000..d7f1942
--- /dev/null
@@ -0,0 +1,5 @@
+--
+-- Unencryted TPM Blob.  For details of the format, see:
+-- http://david.woodhou.se/draft-woodhouse-cert-best-practice.html#I-D.mavrogiannopoulos-tpmuri
+--
+PrivateKeyInfo ::= OCTET STRING ({ tpm_note_key })
diff --git a/crypto/asymmetric_keys/tpm_parser.c b/crypto/asymmetric_keys/tpm_parser.c
new file mode 100644 (file)
index 0000000..96405d8
--- /dev/null
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) "TPM-PARSER: "fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <keys/asymmetric-subtype.h>
+#include <keys/asymmetric-parser.h>
+#include <crypto/asym_tpm_subtype.h>
+#include "tpm.asn1.h"
+
+struct tpm_parse_context {
+       const void      *blob;
+       u32             blob_len;
+};
+
+/*
+ * Note the key data of the ASN.1 blob.
+ */
+int tpm_note_key(void *context, size_t hdrlen,
+                  unsigned char tag,
+                  const void *value, size_t vlen)
+{
+       struct tpm_parse_context *ctx = context;
+
+       ctx->blob = value;
+       ctx->blob_len = vlen;
+
+       return 0;
+}
+
+/*
+ * Parse a TPM-encrypted private key blob.
+ */
+static struct tpm_key *tpm_parse(const void *data, size_t datalen)
+{
+       struct tpm_parse_context ctx;
+       long ret;
+
+       memset(&ctx, 0, sizeof(ctx));
+
+       /* Attempt to decode the private key */
+       ret = asn1_ber_decoder(&tpm_decoder, &ctx, data, datalen);
+       if (ret < 0)
+               goto error;
+
+       return tpm_key_create(ctx.blob, ctx.blob_len);
+
+error:
+       return ERR_PTR(ret);
+}
+/*
+ * Attempt to parse a data blob for a key as a TPM private key blob.
+ */
+static int tpm_key_preparse(struct key_preparsed_payload *prep)
+{
+       struct tpm_key *tk;
+
+       /*
+        * TPM 1.2 keys are max 2048 bits long, so assume the blob is no
+        * more than 4x that
+        */
+       if (prep->datalen > 256 * 4)
+               return -EMSGSIZE;
+
+       tk = tpm_parse(prep->data, prep->datalen);
+
+       if (IS_ERR(tk))
+               return PTR_ERR(tk);
+
+       /* We're pinning the module by being linked against it */
+       __module_get(asym_tpm_subtype.owner);
+       prep->payload.data[asym_subtype] = &asym_tpm_subtype;
+       prep->payload.data[asym_key_ids] = NULL;
+       prep->payload.data[asym_crypto] = tk;
+       prep->payload.data[asym_auth] = NULL;
+       prep->quotalen = 100;
+       return 0;
+}
+
+static struct asymmetric_key_parser tpm_key_parser = {
+       .owner  = THIS_MODULE,
+       .name   = "tpm_parser",
+       .parse  = tpm_key_preparse,
+};
+
+static int __init tpm_key_init(void)
+{
+       return register_asymmetric_key_parser(&tpm_key_parser);
+}
+
+static void __exit tpm_key_exit(void)
+{
+       unregister_asymmetric_key_parser(&tpm_key_parser);
+}
+
+module_init(tpm_key_init);
+module_exit(tpm_key_exit);
+
+MODULE_DESCRIPTION("TPM private key-blob parser");
+MODULE_LICENSE("GPL v2");
index b6cabac4b62ba6b920cb5947c56db5839711bcc7..991f4d735a4ef1d89083f15cae8efa77f82060d6 100644 (file)
@@ -199,35 +199,32 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
 
        case OID_md4WithRSAEncryption:
                ctx->cert->sig->hash_algo = "md4";
-               ctx->cert->sig->pkey_algo = "rsa";
-               break;
+               goto rsa_pkcs1;
 
        case OID_sha1WithRSAEncryption:
                ctx->cert->sig->hash_algo = "sha1";
-               ctx->cert->sig->pkey_algo = "rsa";
-               break;
+               goto rsa_pkcs1;
 
        case OID_sha256WithRSAEncryption:
                ctx->cert->sig->hash_algo = "sha256";
-               ctx->cert->sig->pkey_algo = "rsa";
-               break;
+               goto rsa_pkcs1;
 
        case OID_sha384WithRSAEncryption:
                ctx->cert->sig->hash_algo = "sha384";
-               ctx->cert->sig->pkey_algo = "rsa";
-               break;
+               goto rsa_pkcs1;
 
        case OID_sha512WithRSAEncryption:
                ctx->cert->sig->hash_algo = "sha512";
-               ctx->cert->sig->pkey_algo = "rsa";
-               break;
+               goto rsa_pkcs1;
 
        case OID_sha224WithRSAEncryption:
                ctx->cert->sig->hash_algo = "sha224";
-               ctx->cert->sig->pkey_algo = "rsa";
-               break;
+               goto rsa_pkcs1;
        }
 
+rsa_pkcs1:
+       ctx->cert->sig->pkey_algo = "rsa";
+       ctx->cert->sig->encoding = "pkcs1";
        ctx->algo_oid = ctx->last_oid;
        return 0;
 }
index 812476e4682138225fd46fd2745062d5e1d55105..cfc04e15fd97506a6110c5845673db731a0cf757 100644 (file)
@@ -392,7 +392,8 @@ static int pkcs1pad_sign(struct akcipher_request *req)
        if (!ctx->key_size)
                return -EINVAL;
 
-       digest_size = digest_info->size;
+       if (digest_info)
+               digest_size = digest_info->size;
 
        if (req->src_len + digest_size > ctx->key_size - 11)
                return -EOVERFLOW;
@@ -412,8 +413,9 @@ static int pkcs1pad_sign(struct akcipher_request *req)
        memset(req_ctx->in_buf + 1, 0xff, ps_end - 1);
        req_ctx->in_buf[ps_end] = 0x00;
 
-       memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data,
-              digest_info->size);
+       if (digest_info)
+               memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data,
+                      digest_info->size);
 
        pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
                        ctx->key_size - 1 - req->src_len, req->src);
@@ -475,10 +477,13 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
                goto done;
        pos++;
 
-       if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size))
-               goto done;
+       if (digest_info) {
+               if (crypto_memneq(out_buf + pos, digest_info->data,
+                                 digest_info->size))
+                       goto done;
 
-       pos += digest_info->size;
+               pos += digest_info->size;
+       }
 
        err = 0;
 
@@ -608,11 +613,14 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
        hash_name = crypto_attr_alg_name(tb[2]);
        if (IS_ERR(hash_name))
-               return PTR_ERR(hash_name);
+               hash_name = NULL;
 
-       digest_info = rsa_lookup_asn1(hash_name);
-       if (!digest_info)
-               return -EINVAL;
+       if (hash_name) {
+               digest_info = rsa_lookup_asn1(hash_name);
+               if (!digest_info)
+                       return -EINVAL;
+       } else
+               digest_info = NULL;
 
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
@@ -632,14 +640,29 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
        err = -ENAMETOOLONG;
 
-       if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
-                    "pkcs1pad(%s,%s)", rsa_alg->base.cra_name, hash_name) >=
-           CRYPTO_MAX_ALG_NAME ||
-           snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-                    "pkcs1pad(%s,%s)",
-                    rsa_alg->base.cra_driver_name, hash_name) >=
-           CRYPTO_MAX_ALG_NAME)
-               goto out_drop_alg;
+       if (!hash_name) {
+               if (snprintf(inst->alg.base.cra_name,
+                            CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
+                            rsa_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
+                       goto out_drop_alg;
+
+               if (snprintf(inst->alg.base.cra_driver_name,
+                            CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
+                            rsa_alg->base.cra_driver_name) >=
+                            CRYPTO_MAX_ALG_NAME)
+                       goto out_drop_alg;
+       } else {
+               if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+                            "pkcs1pad(%s,%s)", rsa_alg->base.cra_name,
+                            hash_name) >= CRYPTO_MAX_ALG_NAME)
+                       goto out_drop_alg;
+
+               if (snprintf(inst->alg.base.cra_driver_name,
+                            CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)",
+                            rsa_alg->base.cra_driver_name,
+                            hash_name) >= CRYPTO_MAX_ALG_NAME)
+                       goto out_drop_alg;
+       }
 
        inst->alg.base.cra_flags = rsa_alg->base.cra_flags & CRYPTO_ALG_ASYNC;
        inst->alg.base.cra_priority = rsa_alg->base.cra_priority;
index a7c2673ffd36e8a8287a40182fae189e6b71688e..824ae985ad93bebacbb70010e2e2c632a3d143a8 100644 (file)
@@ -126,6 +126,7 @@ int acpi_device_get_power(struct acpi_device *device, int *state)
 
        return 0;
 }
+EXPORT_SYMBOL(acpi_device_get_power);
 
 static int acpi_dev_pm_explicit_set(struct acpi_device *adev, int state)
 {
index 10ecb232245db8c617ee808966db432ece834358..4b1ff5bc256a3032191f090226ffb4c5d0286ae9 100644 (file)
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Renesas R-Car SATA driver
  *
  * Author: Vladimir Barinov <source@cogentembedded.com>
  * Copyright (C) 2013-2015 Cogent Embedded, Inc.
  * Copyright (C) 2013-2015 Renesas Solutions Corp.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #include <linux/kernel.h>
index 3b25a643058c9dde38511d646da8700e53c46837..21b9b2f2470a26d1f2d1c2d5eb4237fe3902af82 100644 (file)
@@ -155,10 +155,9 @@ struct logical_input {
                        int release_data;
                } std;
                struct {        /* valid when type == INPUT_TYPE_KBD */
-                       /* strings can be non null-terminated */
-                       char press_str[sizeof(void *) + sizeof(int)];
-                       char repeat_str[sizeof(void *) + sizeof(int)];
-                       char release_str[sizeof(void *) + sizeof(int)];
+                       char press_str[sizeof(void *) + sizeof(int)] __nonstring;
+                       char repeat_str[sizeof(void *) + sizeof(int)] __nonstring;
+                       char release_str[sizeof(void *) + sizeof(int)] __nonstring;
                } kbd;
        } u;
 };
index df8103dd40ac2d1f6e5c29ef7790160a3335b35b..c18586fccb6f2b25c3d0ef535144deeabf640006 100644 (file)
@@ -396,15 +396,14 @@ static struct brd_device *brd_alloc(int i)
        disk->first_minor       = i * max_part;
        disk->fops              = &brd_fops;
        disk->private_data      = brd;
-       disk->queue             = brd->brd_queue;
        disk->flags             = GENHD_FL_EXT_DEVT;
        sprintf(disk->disk_name, "ram%d", i);
        set_capacity(disk, rd_size * 2);
-       disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
+       brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
 
        /* Tell the block layer that this is not a rotational device */
-       blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-       blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+       blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
+       blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
 
        return brd;
 
@@ -436,6 +435,7 @@ static struct brd_device *brd_init_one(int i, bool *new)
 
        brd = brd_alloc(i);
        if (brd) {
+               brd->brd_disk->queue = brd->brd_queue;
                add_disk(brd->brd_disk);
                list_add_tail(&brd->brd_list, &brd_devices);
        }
@@ -503,8 +503,14 @@ static int __init brd_init(void)
 
        /* point of no return */
 
-       list_for_each_entry(brd, &brd_devices, brd_list)
+       list_for_each_entry(brd, &brd_devices, brd_list) {
+               /*
+                * associate with queue just before adding disk for
+                * avoiding to mess up failure path
+                */
+               brd->brd_disk->queue = brd->brd_queue;
                add_disk(brd->brd_disk);
+       }
 
        blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
                                  THIS_MODULE, brd_probe, NULL, NULL);
index 55fd104f1ed4b91cf36b0d6cb1c8b9270443507a..fa8204214ac027adf660db960d4297d3f0cca7bb 100644 (file)
@@ -1856,7 +1856,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 
        /* THINK  if (signal_pending) return ... ? */
 
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+       iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
 
        if (sock == connection->data.socket) {
                rcu_read_lock();
index fc67fd853375c033a253753c02b8a3a8c23df4b2..61c392752fe4bbfeba5b1b404bf64cff7e9d8b00 100644 (file)
@@ -516,7 +516,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag
        struct msghdr msg = {
                .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
        };
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, size);
+       iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
        return sock_recvmsg(sock, &msg, msg.msg_flags);
 }
 
index abad6d15f956343ff86ad45d0f40ff4c7faae50b..cb0cc868507620513d3de7658ed1f6999ceaa965 100644 (file)
@@ -77,7 +77,6 @@
 #include <linux/falloc.h>
 #include <linux/uio.h>
 #include <linux/ioprio.h>
-#include <linux/blk-cgroup.h>
 
 #include "loop.h"
 
@@ -269,7 +268,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
        struct iov_iter i;
        ssize_t bw;
 
-       iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
+       iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
 
        file_start_write(file);
        bw = vfs_iter_write(file, &i, ppos, 0);
@@ -347,7 +346,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,
        ssize_t len;
 
        rq_for_each_segment(bvec, rq, iter) {
-               iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
+               iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
                len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
                if (len < 0)
                        return len;
@@ -388,7 +387,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
                b.bv_offset = 0;
                b.bv_len = bvec.bv_len;
 
-               iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
+               iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
                len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
                if (len < 0) {
                        ret = len;
@@ -555,8 +554,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
        }
        atomic_set(&cmd->ref, 2);
 
-       iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
-                     segments, blk_rq_bytes(rq));
+       iov_iter_bvec(&iter, rw, bvec, segments, blk_rq_bytes(rq));
        iter.iov_offset = offset;
 
        cmd->iocb.ki_pos = pos;
@@ -1761,8 +1759,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        /* always use the first bio's css */
 #ifdef CONFIG_BLK_CGROUP
-       if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
-               cmd->css = &bio_blkcg(rq->bio)->css;
+       if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
+               cmd->css = rq->bio->bi_css;
                css_get(cmd->css);
        } else
 #endif
index dfc8de6ce5254872a8b96ec364d7cd3136f4f7d7..a7daa8acbab3a53feda3e1e92dcc51d86a32ba71 100644 (file)
@@ -1942,8 +1942,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
                                dev_warn(&dd->pdev->dev,
                                        "data movement but "
                                        "sect_count is 0\n");
-                                       err = -EINVAL;
-                                       goto abort;
+                               err = -EINVAL;
+                               goto abort;
                        }
                }
        }
index 14a51254c3db7f19c94cdab62e1d9e192c7ae02f..4d4d6129ff6627f1249cade3101d2927d7db5a25 100644 (file)
@@ -473,7 +473,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
        u32 nbd_cmd_flags = 0;
        int sent = nsock->sent, skip = 0;
 
-       iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+       iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
 
        switch (req_op(req)) {
        case REQ_OP_DISCARD:
@@ -564,8 +564,7 @@ send_pages:
 
                        dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
                                req, bvec.bv_len);
-                       iov_iter_bvec(&from, ITER_BVEC | WRITE,
-                                     &bvec, 1, bvec.bv_len);
+                       iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
                        if (skip) {
                                if (skip >= iov_iter_count(&from)) {
                                        skip -= iov_iter_count(&from);
@@ -624,7 +623,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
        int ret = 0;
 
        reply.magic = 0;
-       iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
+       iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
        result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
        if (result <= 0) {
                if (!nbd_disconnected(config))
@@ -678,8 +677,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
                struct bio_vec bvec;
 
                rq_for_each_segment(bvec, req, iter) {
-                       iov_iter_bvec(&to, ITER_BVEC | READ,
-                                     &bvec, 1, bvec.bv_len);
+                       iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
                        result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
                        if (result <= 0) {
                                dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
@@ -1073,7 +1071,7 @@ static void send_disconnects(struct nbd_device *nbd)
        for (i = 0; i < config->num_connections; i++) {
                struct nbd_sock *nsock = config->socks[i];
 
-               iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+               iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
                mutex_lock(&nsock->tx_lock);
                ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
                if (ret <= 0)
index 56452cabce5b587cb7309f9ba24640bbf0ba05da..0ed4b200fa5855e10a142b6f6ce237901cf749ec 100644 (file)
@@ -1919,6 +1919,7 @@ static int negotiate_mq(struct blkfront_info *info)
                              GFP_KERNEL);
        if (!info->rinfo) {
                xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
+               info->nr_rings = 0;
                return -ENOMEM;
        }
 
index ef0ca9414f371bc3275b7afce529029e10b49f68..ff83e899df71fca602aadba443fdce73308eb9c5 100644 (file)
@@ -210,6 +210,7 @@ static int of_fixed_factor_clk_remove(struct platform_device *pdev)
 {
        struct clk *clk = platform_get_drvdata(pdev);
 
+       of_clk_del_provider(pdev->dev.of_node);
        clk_unregister_fixed_factor(clk);
 
        return 0;
index c981159b02c0f09c604a78005f26103c75962e9c..792735d7e46ea0faf3299f710813df3f98cd3834 100644 (file)
@@ -325,6 +325,7 @@ static struct clk_regmap axg_fclk_div2 = {
                .ops = &clk_regmap_gate_ops,
                .parent_names = (const char *[]){ "fclk_div2_div" },
                .num_parents = 1,
+               .flags = CLK_IS_CRITICAL,
        },
 };
 
@@ -349,6 +350,18 @@ static struct clk_regmap axg_fclk_div3 = {
                .ops = &clk_regmap_gate_ops,
                .parent_names = (const char *[]){ "fclk_div3_div" },
                .num_parents = 1,
+               /*
+                * FIXME:
+                * This clock, as fdiv2, is used by the SCPI FW and is required
+                * by the platform to operate correctly.
+                * Until the following condition are met, we need this clock to
+                * be marked as critical:
+                * a) The SCPI generic driver claims and enable all the clocks
+                *    it needs
+                * b) CCF has a clock hand-off mechanism to make the sure the
+                *    clock stays on until the proper driver comes along
+                */
+               .flags = CLK_IS_CRITICAL,
        },
 };
 
index 9309cfaaa464ebd5f3e7d26e174c3c8449e16208..4ada9668fd49c2596de2667aebccd841ee673bb5 100644 (file)
@@ -506,6 +506,18 @@ static struct clk_regmap gxbb_fclk_div3 = {
                .ops = &clk_regmap_gate_ops,
                .parent_names = (const char *[]){ "fclk_div3_div" },
                .num_parents = 1,
+               /*
+                * FIXME:
+                * This clock, as fdiv2, is used by the SCPI FW and is required
+                * by the platform to operate correctly.
+                * Until the following condition are met, we need this clock to
+                * be marked as critical:
+                * a) The SCPI generic driver claims and enable all the clocks
+                *    it needs
+                * b) CCF has a clock hand-off mechanism to make the sure the
+                *    clock stays on until the proper driver comes along
+                */
+               .flags = CLK_IS_CRITICAL,
        },
 };
 
index e4ca6a45f31397324d4f79378b59036a38218641..ef1b267cb058a4a03f0ead86218ee165653fd737 100644 (file)
@@ -265,7 +265,7 @@ static struct clk_fixed_factor cxo = {
        .div = 1,
        .hw.init = &(struct clk_init_data){
                .name = "cxo",
-               .parent_names = (const char *[]){ "xo_board" },
+               .parent_names = (const char *[]){ "xo-board" },
                .num_parents = 1,
                .ops = &clk_fixed_factor_ops,
        },
index a11f4ba98b05c57d08b211ac933f93fcf7cb4616..55c77e44bb2db3e439fd727d8cffcb5d8a279d3e 100644 (file)
@@ -620,4 +620,22 @@ config RISCV_TIMER
          is accessed via both the SBI and the rdcycle instruction.  This is
          required for all RISC-V systems.
 
+config CSKY_MP_TIMER
+       bool "SMP Timer for the C-SKY platform" if COMPILE_TEST
+       depends on CSKY
+       select TIMER_OF
+       help
+         Say yes here to enable C-SKY SMP timer driver used for C-SKY SMP
+         system.
+         csky,mptimer is not only used in SMP system, it also could be used
+         single core system. It's not a mmio reg and it use mtcr/mfcr instruction.
+
+config GX6605S_TIMER
+       bool "Gx6605s SOC system timer driver" if COMPILE_TEST
+       depends on CSKY
+       select CLKSRC_MMIO
+       select TIMER_OF
+       help
+         This option enables support for gx6605s SOC's timer.
+
 endmenu
index e33b21d3f9d8b360305e309ba729457e025b25c2..dd913810456886d1bcf5aacb8da7b8c445d20411 100644 (file)
@@ -79,3 +79,5 @@ obj-$(CONFIG_CLKSRC_ST_LPC)           += clksrc_st_lpc.o
 obj-$(CONFIG_X86_NUMACHIP)             += numachip.o
 obj-$(CONFIG_ATCPIT100_TIMER)          += timer-atcpit100.o
 obj-$(CONFIG_RISCV_TIMER)              += riscv_timer.o
+obj-$(CONFIG_CSKY_MP_TIMER)            += timer-mp-csky.o
+obj-$(CONFIG_GX6605S_TIMER)            += timer-gx6605s.o
diff --git a/drivers/clocksource/timer-gx6605s.c b/drivers/clocksource/timer-gx6605s.c
new file mode 100644 (file)
index 0000000..80d0939
--- /dev/null
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/sched_clock.h>
+
+#include "timer-of.h"
+
+#define CLKSRC_OFFSET  0x40
+
+#define TIMER_STATUS   0x00
+#define TIMER_VALUE    0x04
+#define TIMER_CONTRL   0x10
+#define TIMER_CONFIG   0x20
+#define TIMER_DIV      0x24
+#define TIMER_INI      0x28
+
+#define GX6605S_STATUS_CLR     BIT(0)
+#define GX6605S_CONTRL_RST     BIT(0)
+#define GX6605S_CONTRL_START   BIT(1)
+#define GX6605S_CONFIG_EN      BIT(0)
+#define GX6605S_CONFIG_IRQ_EN  BIT(1)
+
+static irqreturn_t gx6605s_timer_interrupt(int irq, void *dev)
+{
+       struct clock_event_device *ce = dev;
+       void __iomem *base = timer_of_base(to_timer_of(ce));
+
+       writel_relaxed(GX6605S_STATUS_CLR, base + TIMER_STATUS);
+
+       ce->event_handler(ce);
+
+       return IRQ_HANDLED;
+}
+
+static int gx6605s_timer_set_oneshot(struct clock_event_device *ce)
+{
+       void __iomem *base = timer_of_base(to_timer_of(ce));
+
+       /* reset and stop counter */
+       writel_relaxed(GX6605S_CONTRL_RST, base + TIMER_CONTRL);
+
+       /* enable with irq and start */
+       writel_relaxed(GX6605S_CONFIG_EN | GX6605S_CONFIG_IRQ_EN,
+                      base + TIMER_CONFIG);
+
+       return 0;
+}
+
+static int gx6605s_timer_set_next_event(unsigned long delta,
+                                       struct clock_event_device *ce)
+{
+       void __iomem *base = timer_of_base(to_timer_of(ce));
+
+       /* use reset to pause timer */
+       writel_relaxed(GX6605S_CONTRL_RST, base + TIMER_CONTRL);
+
+       /* config next timeout value */
+       writel_relaxed(ULONG_MAX - delta, base + TIMER_INI);
+       writel_relaxed(GX6605S_CONTRL_START, base + TIMER_CONTRL);
+
+       return 0;
+}
+
+static int gx6605s_timer_shutdown(struct clock_event_device *ce)
+{
+       void __iomem *base = timer_of_base(to_timer_of(ce));
+
+       writel_relaxed(0, base + TIMER_CONTRL);
+       writel_relaxed(0, base + TIMER_CONFIG);
+
+       return 0;
+}
+
+static struct timer_of to = {
+       .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
+       .clkevt = {
+               .rating                 = 300,
+               .features               = CLOCK_EVT_FEAT_DYNIRQ |
+                                         CLOCK_EVT_FEAT_ONESHOT,
+               .set_state_shutdown     = gx6605s_timer_shutdown,
+               .set_state_oneshot      = gx6605s_timer_set_oneshot,
+               .set_next_event         = gx6605s_timer_set_next_event,
+               .cpumask                = cpu_possible_mask,
+       },
+       .of_irq = {
+               .handler                = gx6605s_timer_interrupt,
+               .flags                  = IRQF_TIMER | IRQF_IRQPOLL,
+       },
+};
+
+static u64 notrace gx6605s_sched_clock_read(void)
+{
+       void __iomem *base;
+
+       base = timer_of_base(&to) + CLKSRC_OFFSET;
+
+       return (u64)readl_relaxed(base + TIMER_VALUE);
+}
+
+static void gx6605s_clkevt_init(void __iomem *base)
+{
+       writel_relaxed(0, base + TIMER_DIV);
+       writel_relaxed(0, base + TIMER_CONFIG);
+
+       clockevents_config_and_register(&to.clkevt, timer_of_rate(&to), 2,
+                                       ULONG_MAX);
+}
+
+static int gx6605s_clksrc_init(void __iomem *base)
+{
+       writel_relaxed(0, base + TIMER_DIV);
+       writel_relaxed(0, base + TIMER_INI);
+
+       writel_relaxed(GX6605S_CONTRL_RST, base + TIMER_CONTRL);
+
+       writel_relaxed(GX6605S_CONFIG_EN, base + TIMER_CONFIG);
+
+       writel_relaxed(GX6605S_CONTRL_START, base + TIMER_CONTRL);
+
+       sched_clock_register(gx6605s_sched_clock_read, 32, timer_of_rate(&to));
+
+       return clocksource_mmio_init(base + TIMER_VALUE, "gx6605s",
+                       timer_of_rate(&to), 200, 32, clocksource_mmio_readl_up);
+}
+
+static int __init gx6605s_timer_init(struct device_node *np)
+{
+       int ret;
+
+       /*
+        * The timer driver is for nationalchip gx6605s SOC and there are two
+        * same timer in gx6605s. We use one for clkevt and another for clksrc.
+        *
+        * The timer is mmio map to access, so we need give mmio address in dts.
+        *
+        * It provides a 32bit countup timer and interrupt will be caused by
+        * count-overflow.
+        * So we need set-next-event by ULONG_MAX - delta in TIMER_INI reg.
+        *
+        * The counter at 0x0  offset is clock event.
+        * The counter at 0x40 offset is clock source.
+        * They are the same in hardware, just different used by driver.
+        */
+       ret = timer_of_init(np, &to);
+       if (ret)
+               return ret;
+
+       gx6605s_clkevt_init(timer_of_base(&to));
+
+       return gx6605s_clksrc_init(timer_of_base(&to) + CLKSRC_OFFSET);
+}
+TIMER_OF_DECLARE(csky_gx6605s_timer, "csky,gx6605s-timer", gx6605s_timer_init);
diff --git a/drivers/clocksource/timer-mp-csky.c b/drivers/clocksource/timer-mp-csky.c
new file mode 100644 (file)
index 0000000..a8acc43
--- /dev/null
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/sched_clock.h>
+#include <linux/cpu.h>
+#include <linux/of_irq.h>
+#include <asm/reg_ops.h>
+
+#include "timer-of.h"
+
+#define PTIM_CCVR      "cr<3, 14>"
+#define PTIM_CTLR      "cr<0, 14>"
+#define PTIM_LVR       "cr<6, 14>"
+#define PTIM_TSR       "cr<1, 14>"
+
+static int csky_mptimer_irq;
+
+static int csky_mptimer_set_next_event(unsigned long delta,
+                                      struct clock_event_device *ce)
+{
+       mtcr(PTIM_LVR, delta);
+
+       return 0;
+}
+
+static int csky_mptimer_shutdown(struct clock_event_device *ce)
+{
+       mtcr(PTIM_CTLR, 0);
+
+       return 0;
+}
+
+static int csky_mptimer_oneshot(struct clock_event_device *ce)
+{
+       mtcr(PTIM_CTLR, 1);
+
+       return 0;
+}
+
+static int csky_mptimer_oneshot_stopped(struct clock_event_device *ce)
+{
+       mtcr(PTIM_CTLR, 0);
+
+       return 0;
+}
+
+static DEFINE_PER_CPU(struct timer_of, csky_to) = {
+       .flags                                  = TIMER_OF_CLOCK,
+       .clkevt = {
+               .rating                         = 300,
+               .features                       = CLOCK_EVT_FEAT_PERCPU |
+                                                 CLOCK_EVT_FEAT_ONESHOT,
+               .set_state_shutdown             = csky_mptimer_shutdown,
+               .set_state_oneshot              = csky_mptimer_oneshot,
+               .set_state_oneshot_stopped      = csky_mptimer_oneshot_stopped,
+               .set_next_event                 = csky_mptimer_set_next_event,
+       },
+};
+
+static irqreturn_t csky_timer_interrupt(int irq, void *dev)
+{
+       struct timer_of *to = this_cpu_ptr(&csky_to);
+
+       mtcr(PTIM_TSR, 0);
+
+       to->clkevt.event_handler(&to->clkevt);
+
+       return IRQ_HANDLED;
+}
+
+/*
+ * clock event for percpu
+ */
+static int csky_mptimer_starting_cpu(unsigned int cpu)
+{
+       struct timer_of *to = per_cpu_ptr(&csky_to, cpu);
+
+       to->clkevt.cpumask = cpumask_of(cpu);
+
+       clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
+                                       2, ULONG_MAX);
+
+       enable_percpu_irq(csky_mptimer_irq, 0);
+
+       return 0;
+}
+
+static int csky_mptimer_dying_cpu(unsigned int cpu)
+{
+       disable_percpu_irq(csky_mptimer_irq);
+
+       return 0;
+}
+
+/*
+ * clock source
+ */
+static u64 sched_clock_read(void)
+{
+       return (u64)mfcr(PTIM_CCVR);
+}
+
+static u64 clksrc_read(struct clocksource *c)
+{
+       return (u64)mfcr(PTIM_CCVR);
+}
+
+struct clocksource csky_clocksource = {
+       .name   = "csky",
+       .rating = 400,
+       .mask   = CLOCKSOURCE_MASK(32),
+       .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
+       .read   = clksrc_read,
+};
+
+static int __init csky_mptimer_init(struct device_node *np)
+{
+       int ret, cpu, cpu_rollback;
+       struct timer_of *to = NULL;
+
+       /*
+        * Csky_mptimer is designed for C-SKY SMP multi-processors and
+        * every core has it's own private irq and regs for clkevt and
+        * clksrc.
+        *
+        * The regs is accessed by cpu instruction: mfcr/mtcr instead of
+        * mmio map style. So we needn't mmio-address in dts, but we still
+        * need to give clk and irq number.
+        *
+        * We use private irq for the mptimer and irq number is the same
+        * for every core. So we use request_percpu_irq() in timer_of_init.
+        */
+       csky_mptimer_irq = irq_of_parse_and_map(np, 0);
+       if (csky_mptimer_irq <= 0)
+               return -EINVAL;
+
+       ret = request_percpu_irq(csky_mptimer_irq, csky_timer_interrupt,
+                                "csky_mp_timer", &csky_to);
+       if (ret)
+               return -EINVAL;
+
+       for_each_possible_cpu(cpu) {
+               to = per_cpu_ptr(&csky_to, cpu);
+               ret = timer_of_init(np, to);
+               if (ret)
+                       goto rollback;
+       }
+
+       clocksource_register_hz(&csky_clocksource, timer_of_rate(to));
+       sched_clock_register(sched_clock_read, 32, timer_of_rate(to));
+
+       ret = cpuhp_setup_state(CPUHP_AP_CSKY_TIMER_STARTING,
+                               "clockevents/csky/timer:starting",
+                               csky_mptimer_starting_cpu,
+                               csky_mptimer_dying_cpu);
+       if (ret)
+               return -EINVAL;
+
+       return 0;
+
+rollback:
+       for_each_possible_cpu(cpu_rollback) {
+               if (cpu_rollback == cpu)
+                       break;
+
+               to = per_cpu_ptr(&csky_to, cpu_rollback);
+               timer_of_cleanup(to);
+       }
+       return -EINVAL;
+}
+TIMER_OF_DECLARE(csky_mptimer, "csky,mptimer", csky_mptimer_init);
index df9467eef32a0e4b67090e56e1a5c0260a0f4037..41c9ccdd20d65658f461991ab4e8bc74d0e6fa4a 100644 (file)
@@ -234,6 +234,7 @@ config EDAC_SKX
        depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
        depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
        select DMI
+       select ACPI_ADXL if ACPI
        help
          Support for error detection and correction the Intel
          Skylake server Integrated Memory Controllers. If your
index dd209e0dd9abb2ca72c0c2b45a5548088852d5c9..a99ea61dad321dddad4ab28bea15ce593ae0c24f 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/bitmap.h>
 #include <linux/math64.h>
 #include <linux/mod_devicetable.h>
+#include <linux/adxl.h>
 #include <acpi/nfit.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
@@ -35,6 +36,7 @@
 #include "edac_module.h"
 
 #define EDAC_MOD_STR    "skx_edac"
+#define MSG_SIZE       1024
 
 /*
  * Debug macros
 static LIST_HEAD(skx_edac_list);
 
 static u64 skx_tolm, skx_tohm;
+static char *skx_msg;
+static unsigned int nvdimm_count;
+
+enum {
+       INDEX_SOCKET,
+       INDEX_MEMCTRL,
+       INDEX_CHANNEL,
+       INDEX_DIMM,
+       INDEX_MAX
+};
+
+static const char * const component_names[] = {
+       [INDEX_SOCKET]  = "ProcessorSocketId",
+       [INDEX_MEMCTRL] = "MemoryControllerId",
+       [INDEX_CHANNEL] = "ChannelId",
+       [INDEX_DIMM]    = "DimmSlotId",
+};
+
+static int component_indices[ARRAY_SIZE(component_names)];
+static int adxl_component_count;
+static const char * const *adxl_component_names;
+static u64 *adxl_values;
+static char *adxl_msg;
 
 #define NUM_IMC                        2       /* memory controllers per socket */
 #define NUM_CHANNELS           3       /* channels per memory controller */
@@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
        u16 flags;
        u64 size = 0;
 
+       nvdimm_count++;
+
        dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
                                                   imc->src_id, 0);
 
@@ -941,12 +968,46 @@ static void teardown_skx_debug(void)
 }
 #endif /*CONFIG_EDAC_DEBUG*/
 
+static bool skx_adxl_decode(struct decoded_addr *res)
+
+{
+       int i, len = 0;
+
+       if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
+                                     res->addr < BIT_ULL(32))) {
+               edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
+               return false;
+       }
+
+       if (adxl_decode(res->addr, adxl_values)) {
+               edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
+               return false;
+       }
+
+       res->socket  = (int)adxl_values[component_indices[INDEX_SOCKET]];
+       res->imc     = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+       res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
+       res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];
+
+       for (i = 0; i < adxl_component_count; i++) {
+               if (adxl_values[i] == ~0x0ull)
+                       continue;
+
+               len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
+                               adxl_component_names[i], adxl_values[i]);
+               if (MSG_SIZE - len <= 0)
+                       break;
+       }
+
+       return true;
+}
+
 static void skx_mce_output_error(struct mem_ctl_info *mci,
                                 const struct mce *m,
                                 struct decoded_addr *res)
 {
        enum hw_event_mc_err_type tp_event;
-       char *type, *optype, msg[256];
+       char *type, *optype;
        bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
        bool overflow = GET_BITFIELD(m->status, 62, 62);
        bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
                        break;
                }
        }
+       if (adxl_component_count) {
+               snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s",
+                        overflow ? " OVERFLOW" : "",
+                        (uncorrected_error && recoverable) ? " recoverable" : "",
+                        mscod, errcode, adxl_msg);
+       } else {
+               snprintf(skx_msg, MSG_SIZE,
+                        "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
+                        overflow ? " OVERFLOW" : "",
+                        (uncorrected_error && recoverable) ? " recoverable" : "",
+                        mscod, errcode,
+                        res->socket, res->imc, res->rank,
+                        res->bank_group, res->bank_address, res->row, res->column);
+       }
 
-       snprintf(msg, sizeof(msg),
-                "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
-                overflow ? " OVERFLOW" : "",
-                (uncorrected_error && recoverable) ? " recoverable" : "",
-                mscod, errcode,
-                res->socket, res->imc, res->rank,
-                res->bank_group, res->bank_address, res->row, res->column);
-
-       edac_dbg(0, "%s\n", msg);
+       edac_dbg(0, "%s\n", skx_msg);
 
        /* Call the helper to output message */
        edac_mc_handle_error(tp_event, mci, core_err_cnt,
                             m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
                             res->channel, res->dimm, -1,
-                            optype, msg);
+                            optype, skx_msg);
+}
+
+static struct mem_ctl_info *get_mci(int src_id, int lmc)
+{
+       struct skx_dev *d;
+
+       if (lmc > NUM_IMC - 1) {
+               skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
+               return NULL;
+       }
+
+       list_for_each_entry(d, &skx_edac_list, list) {
+               if (d->imc[0].src_id == src_id)
+                       return d->imc[lmc].mci;
+       }
+
+       skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
+
+       return NULL;
 }
 
 static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
@@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
        if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
                return NOTIFY_DONE;
 
+       memset(&res, 0, sizeof(res));
        res.addr = mce->addr;
-       if (!skx_decode(&res))
+
+       if (adxl_component_count) {
+               if (!skx_adxl_decode(&res))
+                       return NOTIFY_DONE;
+
+               mci = get_mci(res.socket, res.imc);
+       } else {
+               if (!skx_decode(&res))
+                       return NOTIFY_DONE;
+
+               mci = res.dev->imc[res.imc].mci;
+       }
+
+       if (!mci)
                return NOTIFY_DONE;
-       mci = res.dev->imc[res.imc].mci;
 
        if (mce->mcgstatus & MCG_STATUS_MCIP)
                type = "Exception";
@@ -1094,6 +1193,62 @@ static void skx_remove(void)
        }
 }
 
+static void __init skx_adxl_get(void)
+{
+       const char * const *names;
+       int i, j;
+
+       names = adxl_get_component_names();
+       if (!names) {
+               skx_printk(KERN_NOTICE, "No firmware support for address translation.");
+               skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
+               return;
+       }
+
+       for (i = 0; i < INDEX_MAX; i++) {
+               for (j = 0; names[j]; j++) {
+                       if (!strcmp(component_names[i], names[j])) {
+                               component_indices[i] = j;
+                               break;
+                       }
+               }
+
+               if (!names[j])
+                       goto err;
+       }
+
+       adxl_component_names = names;
+       while (*names++)
+               adxl_component_count++;
+
+       adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
+                             GFP_KERNEL);
+       if (!adxl_values) {
+               adxl_component_count = 0;
+               return;
+       }
+
+       adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
+       if (!adxl_msg) {
+               adxl_component_count = 0;
+               kfree(adxl_values);
+       }
+
+       return;
+err:
+       skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
+                  component_names[i]);
+       for (j = 0; names[j]; j++)
+               skx_printk(KERN_CONT, "%s ", names[j]);
+       skx_printk(KERN_CONT, "\n");
+}
+
+static void __exit skx_adxl_put(void)
+{
+       kfree(adxl_values);
+       kfree(adxl_msg);
+}
+
 /*
  * skx_init:
  *     make sure we are running on the correct cpu model
@@ -1158,6 +1313,15 @@ static int __init skx_init(void)
                }
        }
 
+       skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
+       if (!skx_msg) {
+               rc = -ENOMEM;
+               goto fail;
+       }
+
+       if (nvdimm_count)
+               skx_adxl_get();
+
        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
        opstate_init();
 
@@ -1176,6 +1340,9 @@ static void __exit skx_exit(void)
        edac_dbg(2, "\n");
        mce_unregister_decode_chain(&skx_mce_dec);
        skx_remove();
+       if (nvdimm_count)
+               skx_adxl_put();
+       kfree(skx_msg);
        teardown_skx_debug();
 }
 
index 7670e8dda82951312feee7eca53f7499129c911a..7273e5082b4175be3601e28775506ce8b5ee89d9 100644 (file)
@@ -145,34 +145,6 @@ config EFI_PCDP
          See DIG64_HCDPv20_042804.pdf available from
          <http://www.dig64.org/specifications/> 
 
-config DELL_RBU
-       tristate "BIOS update support for DELL systems via sysfs"
-       depends on X86
-       select FW_LOADER
-       select FW_LOADER_USER_HELPER
-       help
-        Say m if you want to have the option of updating the BIOS for your
-        DELL system. Note you need a Dell OpenManage or Dell Update package (DUP)
-        supporting application to communicate with the BIOS regarding the new
-        image for the image update to take effect.
-        See <file:Documentation/dell_rbu.txt> for more details on the driver.
-
-config DCDBAS
-       tristate "Dell Systems Management Base Driver"
-       depends on X86
-       help
-         The Dell Systems Management Base Driver provides a sysfs interface
-         for systems management software to perform System Management
-         Interrupts (SMIs) and Host Control Actions (system power cycle or
-         power off after OS shutdown) on certain Dell systems.
-
-         See <file:Documentation/dcdbas.txt> for more details on the driver
-         and the Dell systems on which Dell systems management software makes
-         use of this driver.
-
-         Say Y or M here to enable the driver for use by Dell systems
-         management software such as Dell OpenManage.
-
 config DMIID
     bool "Export DMI identification via sysfs to userspace"
     depends on DMI
index 13660a9514379e7534af0f56c6e084f68b2ef02c..3158dffd9914e6afd44a51caf7acfd688c8984b3 100644 (file)
@@ -11,8 +11,6 @@ obj-$(CONFIG_DMI)             += dmi_scan.o
 obj-$(CONFIG_DMI_SYSFS)                += dmi-sysfs.o
 obj-$(CONFIG_EDD)              += edd.o
 obj-$(CONFIG_EFI_PCDP)         += pcdp.o
-obj-$(CONFIG_DELL_RBU)          += dell_rbu.o
-obj-$(CONFIG_DCDBAS)           += dcdbas.o
 obj-$(CONFIG_DMIID)            += dmi-id.o
 obj-$(CONFIG_ISCSI_IBFT_FIND)  += iscsi_ibft_find.o
 obj-$(CONFIG_ISCSI_IBFT)       += iscsi_ibft.o
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
deleted file mode 100644 (file)
index 0bdea60..0000000
+++ /dev/null
@@ -1,650 +0,0 @@
-/*
- *  dcdbas.c: Dell Systems Management Base Driver
- *
- *  The Dell Systems Management Base Driver provides a sysfs interface for
- *  systems management software to perform System Management Interrupts (SMIs)
- *  and Host Control Actions (power cycle or power off after OS shutdown) on
- *  Dell systems.
- *
- *  See Documentation/dcdbas.txt for more information.
- *
- *  Copyright (C) 1995-2006 Dell Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License v2.0 as published by
- *  the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- */
-
-#include <linux/platform_device.h>
-#include <linux/dma-mapping.h>
-#include <linux/errno.h>
-#include <linux/cpu.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/mc146818rtc.h>
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mutex.h>
-#include <asm/io.h>
-
-#include "dcdbas.h"
-
-#define DRIVER_NAME            "dcdbas"
-#define DRIVER_VERSION         "5.6.0-3.2"
-#define DRIVER_DESCRIPTION     "Dell Systems Management Base Driver"
-
-static struct platform_device *dcdbas_pdev;
-
-static u8 *smi_data_buf;
-static dma_addr_t smi_data_buf_handle;
-static unsigned long smi_data_buf_size;
-static u32 smi_data_buf_phys_addr;
-static DEFINE_MUTEX(smi_data_lock);
-
-static unsigned int host_control_action;
-static unsigned int host_control_smi_type;
-static unsigned int host_control_on_shutdown;
-
-/**
- * smi_data_buf_free: free SMI data buffer
- */
-static void smi_data_buf_free(void)
-{
-       if (!smi_data_buf)
-               return;
-
-       dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
-               __func__, smi_data_buf_phys_addr, smi_data_buf_size);
-
-       dma_free_coherent(&dcdbas_pdev->dev, smi_data_buf_size, smi_data_buf,
-                         smi_data_buf_handle);
-       smi_data_buf = NULL;
-       smi_data_buf_handle = 0;
-       smi_data_buf_phys_addr = 0;
-       smi_data_buf_size = 0;
-}
-
-/**
- * smi_data_buf_realloc: grow SMI data buffer if needed
- */
-static int smi_data_buf_realloc(unsigned long size)
-{
-       void *buf;
-       dma_addr_t handle;
-
-       if (smi_data_buf_size >= size)
-               return 0;
-
-       if (size > MAX_SMI_DATA_BUF_SIZE)
-               return -EINVAL;
-
-       /* new buffer is needed */
-       buf = dma_alloc_coherent(&dcdbas_pdev->dev, size, &handle, GFP_KERNEL);
-       if (!buf) {
-               dev_dbg(&dcdbas_pdev->dev,
-                       "%s: failed to allocate memory size %lu\n",
-                       __func__, size);
-               return -ENOMEM;
-       }
-       /* memory zeroed by dma_alloc_coherent */
-
-       if (smi_data_buf)
-               memcpy(buf, smi_data_buf, smi_data_buf_size);
-
-       /* free any existing buffer */
-       smi_data_buf_free();
-
-       /* set up new buffer for use */
-       smi_data_buf = buf;
-       smi_data_buf_handle = handle;
-       smi_data_buf_phys_addr = (u32) virt_to_phys(buf);
-       smi_data_buf_size = size;
-
-       dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
-               __func__, smi_data_buf_phys_addr, smi_data_buf_size);
-
-       return 0;
-}
-
-static ssize_t smi_data_buf_phys_addr_show(struct device *dev,
-                                          struct device_attribute *attr,
-                                          char *buf)
-{
-       return sprintf(buf, "%x\n", smi_data_buf_phys_addr);
-}
-
-static ssize_t smi_data_buf_size_show(struct device *dev,
-                                     struct device_attribute *attr,
-                                     char *buf)
-{
-       return sprintf(buf, "%lu\n", smi_data_buf_size);
-}
-
-static ssize_t smi_data_buf_size_store(struct device *dev,
-                                      struct device_attribute *attr,
-                                      const char *buf, size_t count)
-{
-       unsigned long buf_size;
-       ssize_t ret;
-
-       buf_size = simple_strtoul(buf, NULL, 10);
-
-       /* make sure SMI data buffer is at least buf_size */
-       mutex_lock(&smi_data_lock);
-       ret = smi_data_buf_realloc(buf_size);
-       mutex_unlock(&smi_data_lock);
-       if (ret)
-               return ret;
-
-       return count;
-}
-
-static ssize_t smi_data_read(struct file *filp, struct kobject *kobj,
-                            struct bin_attribute *bin_attr,
-                            char *buf, loff_t pos, size_t count)
-{
-       ssize_t ret;
-
-       mutex_lock(&smi_data_lock);
-       ret = memory_read_from_buffer(buf, count, &pos, smi_data_buf,
-                                       smi_data_buf_size);
-       mutex_unlock(&smi_data_lock);
-       return ret;
-}
-
-static ssize_t smi_data_write(struct file *filp, struct kobject *kobj,
-                             struct bin_attribute *bin_attr,
-                             char *buf, loff_t pos, size_t count)
-{
-       ssize_t ret;
-
-       if ((pos + count) > MAX_SMI_DATA_BUF_SIZE)
-               return -EINVAL;
-
-       mutex_lock(&smi_data_lock);
-
-       ret = smi_data_buf_realloc(pos + count);
-       if (ret)
-               goto out;
-
-       memcpy(smi_data_buf + pos, buf, count);
-       ret = count;
-out:
-       mutex_unlock(&smi_data_lock);
-       return ret;
-}
-
-static ssize_t host_control_action_show(struct device *dev,
-                                       struct device_attribute *attr,
-                                       char *buf)
-{
-       return sprintf(buf, "%u\n", host_control_action);
-}
-
-static ssize_t host_control_action_store(struct device *dev,
-                                        struct device_attribute *attr,
-                                        const char *buf, size_t count)
-{
-       ssize_t ret;
-
-       /* make sure buffer is available for host control command */
-       mutex_lock(&smi_data_lock);
-       ret = smi_data_buf_realloc(sizeof(struct apm_cmd));
-       mutex_unlock(&smi_data_lock);
-       if (ret)
-               return ret;
-
-       host_control_action = simple_strtoul(buf, NULL, 10);
-       return count;
-}
-
-static ssize_t host_control_smi_type_show(struct device *dev,
-                                         struct device_attribute *attr,
-                                         char *buf)
-{
-       return sprintf(buf, "%u\n", host_control_smi_type);
-}
-
-static ssize_t host_control_smi_type_store(struct device *dev,
-                                          struct device_attribute *attr,
-                                          const char *buf, size_t count)
-{
-       host_control_smi_type = simple_strtoul(buf, NULL, 10);
-       return count;
-}
-
-static ssize_t host_control_on_shutdown_show(struct device *dev,
-                                            struct device_attribute *attr,
-                                            char *buf)
-{
-       return sprintf(buf, "%u\n", host_control_on_shutdown);
-}
-
-static ssize_t host_control_on_shutdown_store(struct device *dev,
-                                             struct device_attribute *attr,
-                                             const char *buf, size_t count)
-{
-       host_control_on_shutdown = simple_strtoul(buf, NULL, 10);
-       return count;
-}
-
-static int raise_smi(void *par)
-{
-       struct smi_cmd *smi_cmd = par;
-
-       if (smp_processor_id() != 0) {
-               dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
-                       __func__);
-               return -EBUSY;
-       }
-
-       /* generate SMI */
-       /* inb to force posted write through and make SMI happen now */
-       asm volatile (
-               "outb %b0,%w1\n"
-               "inb %w1"
-               : /* no output args */
-               : "a" (smi_cmd->command_code),
-                 "d" (smi_cmd->command_address),
-                 "b" (smi_cmd->ebx),
-                 "c" (smi_cmd->ecx)
-               : "memory"
-       );
-
-       return 0;
-}
-/**
- * dcdbas_smi_request: generate SMI request
- *
- * Called with smi_data_lock.
- */
-int dcdbas_smi_request(struct smi_cmd *smi_cmd)
-{
-       int ret;
-
-       if (smi_cmd->magic != SMI_CMD_MAGIC) {
-               dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n",
-                        __func__);
-               return -EBADR;
-       }
-
-       /* SMI requires CPU 0 */
-       get_online_cpus();
-       ret = smp_call_on_cpu(0, raise_smi, smi_cmd, true);
-       put_online_cpus();
-
-       return ret;
-}
-
-/**
- * smi_request_store:
- *
- * The valid values are:
- * 0: zero SMI data buffer
- * 1: generate calling interface SMI
- * 2: generate raw SMI
- *
- * User application writes smi_cmd to smi_data before telling driver
- * to generate SMI.
- */
-static ssize_t smi_request_store(struct device *dev,
-                                struct device_attribute *attr,
-                                const char *buf, size_t count)
-{
-       struct smi_cmd *smi_cmd;
-       unsigned long val = simple_strtoul(buf, NULL, 10);
-       ssize_t ret;
-
-       mutex_lock(&smi_data_lock);
-
-       if (smi_data_buf_size < sizeof(struct smi_cmd)) {
-               ret = -ENODEV;
-               goto out;
-       }
-       smi_cmd = (struct smi_cmd *)smi_data_buf;
-
-       switch (val) {
-       case 2:
-               /* Raw SMI */
-               ret = dcdbas_smi_request(smi_cmd);
-               if (!ret)
-                       ret = count;
-               break;
-       case 1:
-               /* Calling Interface SMI */
-               smi_cmd->ebx = (u32) virt_to_phys(smi_cmd->command_buffer);
-               ret = dcdbas_smi_request(smi_cmd);
-               if (!ret)
-                       ret = count;
-               break;
-       case 0:
-               memset(smi_data_buf, 0, smi_data_buf_size);
-               ret = count;
-               break;
-       default:
-               ret = -EINVAL;
-               break;
-       }
-
-out:
-       mutex_unlock(&smi_data_lock);
-       return ret;
-}
-EXPORT_SYMBOL(dcdbas_smi_request);
-
-/**
- * host_control_smi: generate host control SMI
- *
- * Caller must set up the host control command in smi_data_buf.
- */
-static int host_control_smi(void)
-{
-       struct apm_cmd *apm_cmd;
-       u8 *data;
-       unsigned long flags;
-       u32 num_ticks;
-       s8 cmd_status;
-       u8 index;
-
-       apm_cmd = (struct apm_cmd *)smi_data_buf;
-       apm_cmd->status = ESM_STATUS_CMD_UNSUCCESSFUL;
-
-       switch (host_control_smi_type) {
-       case HC_SMITYPE_TYPE1:
-               spin_lock_irqsave(&rtc_lock, flags);
-               /* write SMI data buffer physical address */
-               data = (u8 *)&smi_data_buf_phys_addr;
-               for (index = PE1300_CMOS_CMD_STRUCT_PTR;
-                    index < (PE1300_CMOS_CMD_STRUCT_PTR + 4);
-                    index++, data++) {
-                       outb(index,
-                            (CMOS_BASE_PORT + CMOS_PAGE2_INDEX_PORT_PIIX4));
-                       outb(*data,
-                            (CMOS_BASE_PORT + CMOS_PAGE2_DATA_PORT_PIIX4));
-               }
-
-               /* first set status to -1 as called by spec */
-               cmd_status = ESM_STATUS_CMD_UNSUCCESSFUL;
-               outb((u8) cmd_status, PCAT_APM_STATUS_PORT);
-
-               /* generate SMM call */
-               outb(ESM_APM_CMD, PCAT_APM_CONTROL_PORT);
-               spin_unlock_irqrestore(&rtc_lock, flags);
-
-               /* wait a few to see if it executed */
-               num_ticks = TIMEOUT_USEC_SHORT_SEMA_BLOCKING;
-               while ((cmd_status = inb(PCAT_APM_STATUS_PORT))
-                      == ESM_STATUS_CMD_UNSUCCESSFUL) {
-                       num_ticks--;
-                       if (num_ticks == EXPIRED_TIMER)
-                               return -ETIME;
-               }
-               break;
-
-       case HC_SMITYPE_TYPE2:
-       case HC_SMITYPE_TYPE3:
-               spin_lock_irqsave(&rtc_lock, flags);
-               /* write SMI data buffer physical address */
-               data = (u8 *)&smi_data_buf_phys_addr;
-               for (index = PE1400_CMOS_CMD_STRUCT_PTR;
-                    index < (PE1400_CMOS_CMD_STRUCT_PTR + 4);
-                    index++, data++) {
-                       outb(index, (CMOS_BASE_PORT + CMOS_PAGE1_INDEX_PORT));
-                       outb(*data, (CMOS_BASE_PORT + CMOS_PAGE1_DATA_PORT));
-               }
-
-               /* generate SMM call */
-               if (host_control_smi_type == HC_SMITYPE_TYPE3)
-                       outb(ESM_APM_CMD, PCAT_APM_CONTROL_PORT);
-               else
-                       outb(ESM_APM_CMD, PE1400_APM_CONTROL_PORT);
-
-               /* restore RTC index pointer since it was written to above */
-               CMOS_READ(RTC_REG_C);
-               spin_unlock_irqrestore(&rtc_lock, flags);
-
-               /* read control port back to serialize write */
-               cmd_status = inb(PE1400_APM_CONTROL_PORT);
-
-               /* wait a few to see if it executed */
-               num_ticks = TIMEOUT_USEC_SHORT_SEMA_BLOCKING;
-               while (apm_cmd->status == ESM_STATUS_CMD_UNSUCCESSFUL) {
-                       num_ticks--;
-                       if (num_ticks == EXPIRED_TIMER)
-                               return -ETIME;
-               }
-               break;
-
-       default:
-               dev_dbg(&dcdbas_pdev->dev, "%s: invalid SMI type %u\n",
-                       __func__, host_control_smi_type);
-               return -ENOSYS;
-       }
-
-       return 0;
-}
-
-/**
- * dcdbas_host_control: initiate host control
- *
- * This function is called by the driver after the system has
- * finished shutting down if the user application specified a
- * host control action to perform on shutdown.  It is safe to
- * use smi_data_buf at this point because the system has finished
- * shutting down and no userspace apps are running.
- */
-static void dcdbas_host_control(void)
-{
-       struct apm_cmd *apm_cmd;
-       u8 action;
-
-       if (host_control_action == HC_ACTION_NONE)
-               return;
-
-       action = host_control_action;
-       host_control_action = HC_ACTION_NONE;
-
-       if (!smi_data_buf) {
-               dev_dbg(&dcdbas_pdev->dev, "%s: no SMI buffer\n", __func__);
-               return;
-       }
-
-       if (smi_data_buf_size < sizeof(struct apm_cmd)) {
-               dev_dbg(&dcdbas_pdev->dev, "%s: SMI buffer too small\n",
-                       __func__);
-               return;
-       }
-
-       apm_cmd = (struct apm_cmd *)smi_data_buf;
-
-       /* power off takes precedence */
-       if (action & HC_ACTION_HOST_CONTROL_POWEROFF) {
-               apm_cmd->command = ESM_APM_POWER_CYCLE;
-               apm_cmd->reserved = 0;
-               *((s16 *)&apm_cmd->parameters.shortreq.parm[0]) = (s16) 0;
-               host_control_smi();
-       } else if (action & HC_ACTION_HOST_CONTROL_POWERCYCLE) {
-               apm_cmd->command = ESM_APM_POWER_CYCLE;
-               apm_cmd->reserved = 0;
-               *((s16 *)&apm_cmd->parameters.shortreq.parm[0]) = (s16) 20;
-               host_control_smi();
-       }
-}
-
-/**
- * dcdbas_reboot_notify: handle reboot notification for host control
- */
-static int dcdbas_reboot_notify(struct notifier_block *nb, unsigned long code,
-                               void *unused)
-{
-       switch (code) {
-       case SYS_DOWN:
-       case SYS_HALT:
-       case SYS_POWER_OFF:
-               if (host_control_on_shutdown) {
-                       /* firmware is going to perform host control action */
-                       printk(KERN_WARNING "Please wait for shutdown "
-                              "action to complete...\n");
-                       dcdbas_host_control();
-               }
-               break;
-       }
-
-       return NOTIFY_DONE;
-}
-
-static struct notifier_block dcdbas_reboot_nb = {
-       .notifier_call = dcdbas_reboot_notify,
-       .next = NULL,
-       .priority = INT_MIN
-};
-
-static DCDBAS_BIN_ATTR_RW(smi_data);
-
-static struct bin_attribute *dcdbas_bin_attrs[] = {
-       &bin_attr_smi_data,
-       NULL
-};
-
-static DCDBAS_DEV_ATTR_RW(smi_data_buf_size);
-static DCDBAS_DEV_ATTR_RO(smi_data_buf_phys_addr);
-static DCDBAS_DEV_ATTR_WO(smi_request);
-static DCDBAS_DEV_ATTR_RW(host_control_action);
-static DCDBAS_DEV_ATTR_RW(host_control_smi_type);
-static DCDBAS_DEV_ATTR_RW(host_control_on_shutdown);
-
-static struct attribute *dcdbas_dev_attrs[] = {
-       &dev_attr_smi_data_buf_size.attr,
-       &dev_attr_smi_data_buf_phys_addr.attr,
-       &dev_attr_smi_request.attr,
-       &dev_attr_host_control_action.attr,
-       &dev_attr_host_control_smi_type.attr,
-       &dev_attr_host_control_on_shutdown.attr,
-       NULL
-};
-
-static const struct attribute_group dcdbas_attr_group = {
-       .attrs = dcdbas_dev_attrs,
-       .bin_attrs = dcdbas_bin_attrs,
-};
-
-static int dcdbas_probe(struct platform_device *dev)
-{
-       int error;
-
-       host_control_action = HC_ACTION_NONE;
-       host_control_smi_type = HC_SMITYPE_NONE;
-
-       dcdbas_pdev = dev;
-
-       /*
-        * BIOS SMI calls require buffer addresses be in 32-bit address space.
-        * This is done by setting the DMA mask below.
-        */
-       error = dma_set_coherent_mask(&dcdbas_pdev->dev, DMA_BIT_MASK(32));
-       if (error)
-               return error;
-
-       error = sysfs_create_group(&dev->dev.kobj, &dcdbas_attr_group);
-       if (error)
-               return error;
-
-       register_reboot_notifier(&dcdbas_reboot_nb);
-
-       dev_info(&dev->dev, "%s (version %s)\n",
-                DRIVER_DESCRIPTION, DRIVER_VERSION);
-
-       return 0;
-}
-
-static int dcdbas_remove(struct platform_device *dev)
-{
-       unregister_reboot_notifier(&dcdbas_reboot_nb);
-       sysfs_remove_group(&dev->dev.kobj, &dcdbas_attr_group);
-
-       return 0;
-}
-
-static struct platform_driver dcdbas_driver = {
-       .driver         = {
-               .name   = DRIVER_NAME,
-       },
-       .probe          = dcdbas_probe,
-       .remove         = dcdbas_remove,
-};
-
-static const struct platform_device_info dcdbas_dev_info __initconst = {
-       .name           = DRIVER_NAME,
-       .id             = -1,
-       .dma_mask       = DMA_BIT_MASK(32),
-};
-
-static struct platform_device *dcdbas_pdev_reg;
-
-/**
- * dcdbas_init: initialize driver
- */
-static int __init dcdbas_init(void)
-{
-       int error;
-
-       error = platform_driver_register(&dcdbas_driver);
-       if (error)
-               return error;
-
-       dcdbas_pdev_reg = platform_device_register_full(&dcdbas_dev_info);
-       if (IS_ERR(dcdbas_pdev_reg)) {
-               error = PTR_ERR(dcdbas_pdev_reg);
-               goto err_unregister_driver;
-       }
-
-       return 0;
-
- err_unregister_driver:
-       platform_driver_unregister(&dcdbas_driver);
-       return error;
-}
-
-/**
- * dcdbas_exit: perform driver cleanup
- */
-static void __exit dcdbas_exit(void)
-{
-       /*
-        * make sure functions that use dcdbas_pdev are called
-        * before platform_device_unregister
-        */
-       unregister_reboot_notifier(&dcdbas_reboot_nb);
-
-       /*
-        * We have to free the buffer here instead of dcdbas_remove
-        * because only in module exit function we can be sure that
-        * all sysfs attributes belonging to this module have been
-        * released.
-        */
-       if (dcdbas_pdev)
-               smi_data_buf_free();
-       platform_device_unregister(dcdbas_pdev_reg);
-       platform_driver_unregister(&dcdbas_driver);
-}
-
-subsys_initcall_sync(dcdbas_init);
-module_exit(dcdbas_exit);
-
-MODULE_DESCRIPTION(DRIVER_DESCRIPTION " (version " DRIVER_VERSION ")");
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_AUTHOR("Dell Inc.");
-MODULE_LICENSE("GPL");
-/* Any System or BIOS claiming to be by Dell */
-MODULE_ALIAS("dmi:*:[bs]vnD[Ee][Ll][Ll]*:*");
diff --git a/drivers/firmware/dcdbas.h b/drivers/firmware/dcdbas.h
deleted file mode 100644 (file)
index ca3cb0a..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- *  dcdbas.h: Definitions for Dell Systems Management Base driver
- *
- *  Copyright (C) 1995-2005 Dell Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License v2.0 as published by
- *  the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- */
-
-#ifndef _DCDBAS_H_
-#define _DCDBAS_H_
-
-#include <linux/device.h>
-#include <linux/sysfs.h>
-#include <linux/types.h>
-
-#define MAX_SMI_DATA_BUF_SIZE                  (256 * 1024)
-
-#define HC_ACTION_NONE                         (0)
-#define HC_ACTION_HOST_CONTROL_POWEROFF                BIT(1)
-#define HC_ACTION_HOST_CONTROL_POWERCYCLE      BIT(2)
-
-#define HC_SMITYPE_NONE                                (0)
-#define HC_SMITYPE_TYPE1                       (1)
-#define HC_SMITYPE_TYPE2                       (2)
-#define HC_SMITYPE_TYPE3                       (3)
-
-#define ESM_APM_CMD                            (0x0A0)
-#define ESM_APM_POWER_CYCLE                    (0x10)
-#define ESM_STATUS_CMD_UNSUCCESSFUL            (-1)
-
-#define CMOS_BASE_PORT                         (0x070)
-#define CMOS_PAGE1_INDEX_PORT                  (0)
-#define CMOS_PAGE1_DATA_PORT                   (1)
-#define CMOS_PAGE2_INDEX_PORT_PIIX4            (2)
-#define CMOS_PAGE2_DATA_PORT_PIIX4             (3)
-#define PE1400_APM_CONTROL_PORT                        (0x0B0)
-#define PCAT_APM_CONTROL_PORT                  (0x0B2)
-#define PCAT_APM_STATUS_PORT                   (0x0B3)
-#define PE1300_CMOS_CMD_STRUCT_PTR             (0x38)
-#define PE1400_CMOS_CMD_STRUCT_PTR             (0x70)
-
-#define MAX_SYSMGMT_SHORTCMD_PARMBUF_LEN       (14)
-#define MAX_SYSMGMT_LONGCMD_SGENTRY_NUM                (16)
-
-#define TIMEOUT_USEC_SHORT_SEMA_BLOCKING       (10000)
-#define EXPIRED_TIMER                          (0)
-
-#define SMI_CMD_MAGIC                          (0x534D4931)
-
-#define DCDBAS_DEV_ATTR_RW(_name) \
-       DEVICE_ATTR(_name,0600,_name##_show,_name##_store);
-
-#define DCDBAS_DEV_ATTR_RO(_name) \
-       DEVICE_ATTR(_name,0400,_name##_show,NULL);
-
-#define DCDBAS_DEV_ATTR_WO(_name) \
-       DEVICE_ATTR(_name,0200,NULL,_name##_store);
-
-#define DCDBAS_BIN_ATTR_RW(_name) \
-struct bin_attribute bin_attr_##_name = { \
-       .attr =  { .name = __stringify(_name), \
-                  .mode = 0600 }, \
-       .read =  _name##_read, \
-       .write = _name##_write, \
-}
-
-struct smi_cmd {
-       __u32 magic;
-       __u32 ebx;
-       __u32 ecx;
-       __u16 command_address;
-       __u8 command_code;
-       __u8 reserved;
-       __u8 command_buffer[1];
-} __attribute__ ((packed));
-
-struct apm_cmd {
-       __u8 command;
-       __s8 status;
-       __u16 reserved;
-       union {
-               struct {
-                       __u8 parm[MAX_SYSMGMT_SHORTCMD_PARMBUF_LEN];
-               } __attribute__ ((packed)) shortreq;
-
-               struct {
-                       __u16 num_sg_entries;
-                       struct {
-                               __u32 size;
-                               __u64 addr;
-                       } __attribute__ ((packed))
-                           sglist[MAX_SYSMGMT_LONGCMD_SGENTRY_NUM];
-               } __attribute__ ((packed)) longreq;
-       } __attribute__ ((packed)) parameters;
-} __attribute__ ((packed));
-
-int dcdbas_smi_request(struct smi_cmd *smi_cmd);
-
-#endif /* _DCDBAS_H_ */
-
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
deleted file mode 100644 (file)
index fb8af5c..0000000
+++ /dev/null
@@ -1,745 +0,0 @@
-/*
- * dell_rbu.c
- * Bios Update driver for Dell systems
- * Author: Dell Inc
- *         Abhay Salunke <abhay_salunke@dell.com>
- *
- * Copyright (C) 2005 Dell Inc.
- *
- * Remote BIOS Update (rbu) driver is used for updating DELL BIOS by
- * creating entries in the /sys file systems on Linux 2.6 and higher
- * kernels. The driver supports two mechanism to update the BIOS namely
- * contiguous and packetized. Both these methods still require having some
- * application to set the CMOS bit indicating the BIOS to update itself
- * after a reboot.
- *
- * Contiguous method:
- * This driver writes the incoming data in a monolithic image by allocating
- * contiguous physical pages large enough to accommodate the incoming BIOS
- * image size.
- *
- * Packetized method:
- * The driver writes the incoming packet image by allocating a new packet
- * on every time the packet data is written. This driver requires an
- * application to break the BIOS image in to fixed sized packet chunks.
- *
- * See Documentation/dell_rbu.txt for more info.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License v2.0 as published by
- * the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/blkdev.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/moduleparam.h>
-#include <linux/firmware.h>
-#include <linux/dma-mapping.h>
-
-MODULE_AUTHOR("Abhay Salunke <abhay_salunke@dell.com>");
-MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems");
-MODULE_LICENSE("GPL");
-MODULE_VERSION("3.2");
-
-#define BIOS_SCAN_LIMIT 0xffffffff
-#define MAX_IMAGE_LENGTH 16
-static struct _rbu_data {
-       void *image_update_buffer;
-       unsigned long image_update_buffer_size;
-       unsigned long bios_image_size;
-       int image_update_ordernum;
-       int dma_alloc;
-       spinlock_t lock;
-       unsigned long packet_read_count;
-       unsigned long num_packets;
-       unsigned long packetsize;
-       unsigned long imagesize;
-       int entry_created;
-} rbu_data;
-
-static char image_type[MAX_IMAGE_LENGTH + 1] = "mono";
-module_param_string(image_type, image_type, sizeof (image_type), 0);
-MODULE_PARM_DESC(image_type,
-       "BIOS image type. choose- mono or packet or init");
-
-static unsigned long allocation_floor = 0x100000;
-module_param(allocation_floor, ulong, 0644);
-MODULE_PARM_DESC(allocation_floor,
-    "Minimum address for allocations when using Packet mode");
-
-struct packet_data {
-       struct list_head list;
-       size_t length;
-       void *data;
-       int ordernum;
-};
-
-static struct packet_data packet_data_head;
-
-static struct platform_device *rbu_device;
-static int context;
-static dma_addr_t dell_rbu_dmaaddr;
-
-static void init_packet_head(void)
-{
-       INIT_LIST_HEAD(&packet_data_head.list);
-       rbu_data.packet_read_count = 0;
-       rbu_data.num_packets = 0;
-       rbu_data.packetsize = 0;
-       rbu_data.imagesize = 0;
-}
-
-static int create_packet(void *data, size_t length)
-{
-       struct packet_data *newpacket;
-       int ordernum = 0;
-       int retval = 0;
-       unsigned int packet_array_size = 0;
-       void **invalid_addr_packet_array = NULL;
-       void *packet_data_temp_buf = NULL;
-       unsigned int idx = 0;
-
-       pr_debug("create_packet: entry \n");
-
-       if (!rbu_data.packetsize) {
-               pr_debug("create_packet: packetsize not specified\n");
-               retval = -EINVAL;
-               goto out_noalloc;
-       }
-
-       spin_unlock(&rbu_data.lock);
-
-       newpacket = kzalloc(sizeof (struct packet_data), GFP_KERNEL);
-
-       if (!newpacket) {
-               printk(KERN_WARNING
-                       "dell_rbu:%s: failed to allocate new "
-                       "packet\n", __func__);
-               retval = -ENOMEM;
-               spin_lock(&rbu_data.lock);
-               goto out_noalloc;
-       }
-
-       ordernum = get_order(length);
-
-       /*
-        * BIOS errata mean we cannot allocate packets below 1MB or they will
-        * be overwritten by BIOS.
-        *
-        * array to temporarily hold packets
-        * that are below the allocation floor
-        *
-        * NOTE: very simplistic because we only need the floor to be at 1MB
-        *       due to BIOS errata. This shouldn't be used for higher floors
-        *       or you will run out of mem trying to allocate the array.
-        */
-       packet_array_size = max(
-                       (unsigned int)(allocation_floor / rbu_data.packetsize),
-                       (unsigned int)1);
-       invalid_addr_packet_array = kcalloc(packet_array_size, sizeof(void *),
-                                               GFP_KERNEL);
-
-       if (!invalid_addr_packet_array) {
-               printk(KERN_WARNING
-                       "dell_rbu:%s: failed to allocate "
-                       "invalid_addr_packet_array \n",
-                       __func__);
-               retval = -ENOMEM;
-               spin_lock(&rbu_data.lock);
-               goto out_alloc_packet;
-       }
-
-       while (!packet_data_temp_buf) {
-               packet_data_temp_buf = (unsigned char *)
-                       __get_free_pages(GFP_KERNEL, ordernum);
-               if (!packet_data_temp_buf) {
-                       printk(KERN_WARNING
-                               "dell_rbu:%s: failed to allocate new "
-                               "packet\n", __func__);
-                       retval = -ENOMEM;
-                       spin_lock(&rbu_data.lock);
-                       goto out_alloc_packet_array;
-               }
-
-               if ((unsigned long)virt_to_phys(packet_data_temp_buf)
-                               < allocation_floor) {
-                       pr_debug("packet 0x%lx below floor at 0x%lx.\n",
-                                       (unsigned long)virt_to_phys(
-                                               packet_data_temp_buf),
-                                       allocation_floor);
-                       invalid_addr_packet_array[idx++] = packet_data_temp_buf;
-                       packet_data_temp_buf = NULL;
-               }
-       }
-       spin_lock(&rbu_data.lock);
-
-       newpacket->data = packet_data_temp_buf;
-
-       pr_debug("create_packet: newpacket at physical addr %lx\n",
-               (unsigned long)virt_to_phys(newpacket->data));
-
-       /* packets may not have fixed size */
-       newpacket->length = length;
-       newpacket->ordernum = ordernum;
-       ++rbu_data.num_packets;
-
-       /* initialize the newly created packet headers */
-       INIT_LIST_HEAD(&newpacket->list);
-       list_add_tail(&newpacket->list, &packet_data_head.list);
-
-       memcpy(newpacket->data, data, length);
-
-       pr_debug("create_packet: exit \n");
-
-out_alloc_packet_array:
-       /* always free packet array */
-       for (;idx>0;idx--) {
-               pr_debug("freeing unused packet below floor 0x%lx.\n",
-                       (unsigned long)virt_to_phys(
-                               invalid_addr_packet_array[idx-1]));
-               free_pages((unsigned long)invalid_addr_packet_array[idx-1],
-                       ordernum);
-       }
-       kfree(invalid_addr_packet_array);
-
-out_alloc_packet:
-       /* if error, free data */
-       if (retval)
-               kfree(newpacket);
-
-out_noalloc:
-       return retval;
-}
-
-static int packetize_data(const u8 *data, size_t length)
-{
-       int rc = 0;
-       int done = 0;
-       int packet_length;
-       u8 *temp;
-       u8 *end = (u8 *) data + length;
-       pr_debug("packetize_data: data length %zd\n", length);
-       if (!rbu_data.packetsize) {
-               printk(KERN_WARNING
-                       "dell_rbu: packetsize not specified\n");
-               return -EIO;
-       }
-
-       temp = (u8 *) data;
-
-       /* packetize the hunk */
-       while (!done) {
-               if ((temp + rbu_data.packetsize) < end)
-                       packet_length = rbu_data.packetsize;
-               else {
-                       /* this is the last packet */
-                       packet_length = end - temp;
-                       done = 1;
-               }
-
-               if ((rc = create_packet(temp, packet_length)))
-                       return rc;
-
-               pr_debug("%p:%td\n", temp, (end - temp));
-               temp += packet_length;
-       }
-
-       rbu_data.imagesize = length;
-
-       return rc;
-}
-
-static int do_packet_read(char *data, struct list_head *ptemp_list,
-       int length, int bytes_read, int *list_read_count)
-{
-       void *ptemp_buf;
-       struct packet_data *newpacket = NULL;
-       int bytes_copied = 0;
-       int j = 0;
-
-       newpacket = list_entry(ptemp_list, struct packet_data, list);
-       *list_read_count += newpacket->length;
-
-       if (*list_read_count > bytes_read) {
-               /* point to the start of unread data */
-               j = newpacket->length - (*list_read_count - bytes_read);
-               /* point to the offset in the packet buffer */
-               ptemp_buf = (u8 *) newpacket->data + j;
-               /*
-                * check if there is enough room in
-                * * the incoming buffer
-                */
-               if (length > (*list_read_count - bytes_read))
-                       /*
-                        * copy what ever is there in this
-                        * packet and move on
-                        */
-                       bytes_copied = (*list_read_count - bytes_read);
-               else
-                       /* copy the remaining */
-                       bytes_copied = length;
-               memcpy(data, ptemp_buf, bytes_copied);
-       }
-       return bytes_copied;
-}
-
-static int packet_read_list(char *data, size_t * pread_length)
-{
-       struct list_head *ptemp_list;
-       int temp_count = 0;
-       int bytes_copied = 0;
-       int bytes_read = 0;
-       int remaining_bytes = 0;
-       char *pdest = data;
-
-       /* check if we have any packets */
-       if (0 == rbu_data.num_packets)
-               return -ENOMEM;
-
-       remaining_bytes = *pread_length;
-       bytes_read = rbu_data.packet_read_count;
-
-       ptemp_list = (&packet_data_head.list)->next;
-       while (!list_empty(ptemp_list)) {
-               bytes_copied = do_packet_read(pdest, ptemp_list,
-                       remaining_bytes, bytes_read, &temp_count);
-               remaining_bytes -= bytes_copied;
-               bytes_read += bytes_copied;
-               pdest += bytes_copied;
-               /*
-                * check if we reached end of buffer before reaching the
-                * last packet
-                */
-               if (remaining_bytes == 0)
-                       break;
-
-               ptemp_list = ptemp_list->next;
-       }
-       /*finally set the bytes read */
-       *pread_length = bytes_read - rbu_data.packet_read_count;
-       rbu_data.packet_read_count = bytes_read;
-       return 0;
-}
-
-static void packet_empty_list(void)
-{
-       struct list_head *ptemp_list;
-       struct list_head *pnext_list;
-       struct packet_data *newpacket;
-
-       ptemp_list = (&packet_data_head.list)->next;
-       while (!list_empty(ptemp_list)) {
-               newpacket =
-                       list_entry(ptemp_list, struct packet_data, list);
-               pnext_list = ptemp_list->next;
-               list_del(ptemp_list);
-               ptemp_list = pnext_list;
-               /*
-                * zero out the RBU packet memory before freeing
-                * to make sure there are no stale RBU packets left in memory
-                */
-               memset(newpacket->data, 0, rbu_data.packetsize);
-               free_pages((unsigned long) newpacket->data,
-                       newpacket->ordernum);
-               kfree(newpacket);
-       }
-       rbu_data.packet_read_count = 0;
-       rbu_data.num_packets = 0;
-       rbu_data.imagesize = 0;
-}
-
-/*
- * img_update_free: Frees the buffer allocated for storing BIOS image
- * Always called with lock held and returned with lock held
- */
-static void img_update_free(void)
-{
-       if (!rbu_data.image_update_buffer)
-               return;
-       /*
-        * zero out this buffer before freeing it to get rid of any stale
-        * BIOS image copied in memory.
-        */
-       memset(rbu_data.image_update_buffer, 0,
-               rbu_data.image_update_buffer_size);
-       if (rbu_data.dma_alloc == 1)
-               dma_free_coherent(NULL, rbu_data.bios_image_size,
-                       rbu_data.image_update_buffer, dell_rbu_dmaaddr);
-       else
-               free_pages((unsigned long) rbu_data.image_update_buffer,
-                       rbu_data.image_update_ordernum);
-
-       /*
-        * Re-initialize the rbu_data variables after a free
-        */
-       rbu_data.image_update_ordernum = -1;
-       rbu_data.image_update_buffer = NULL;
-       rbu_data.image_update_buffer_size = 0;
-       rbu_data.bios_image_size = 0;
-       rbu_data.dma_alloc = 0;
-}
-
-/*
- * img_update_realloc: This function allocates the contiguous pages to
- * accommodate the requested size of data. The memory address and size
- * values are stored globally and on every call to this function the new
- * size is checked to see if more data is required than the existing size.
- * If true the previous memory is freed and new allocation is done to
- * accommodate the new size. If the incoming size is less then than the
- * already allocated size, then that memory is reused. This function is
- * called with lock held and returns with lock held.
- */
-static int img_update_realloc(unsigned long size)
-{
-       unsigned char *image_update_buffer = NULL;
-       unsigned long rc;
-       unsigned long img_buf_phys_addr;
-       int ordernum;
-       int dma_alloc = 0;
-
-       /*
-        * check if the buffer of sufficient size has been
-        * already allocated
-        */
-       if (rbu_data.image_update_buffer_size >= size) {
-               /*
-                * check for corruption
-                */
-               if ((size != 0) && (rbu_data.image_update_buffer == NULL)) {
-                       printk(KERN_ERR "dell_rbu:%s: corruption "
-                               "check failed\n", __func__);
-                       return -EINVAL;
-               }
-               /*
-                * we have a valid pre-allocated buffer with
-                * sufficient size
-                */
-               return 0;
-       }
-
-       /*
-        * free any previously allocated buffer
-        */
-       img_update_free();
-
-       spin_unlock(&rbu_data.lock);
-
-       ordernum = get_order(size);
-       image_update_buffer =
-               (unsigned char *) __get_free_pages(GFP_KERNEL, ordernum);
-
-       img_buf_phys_addr =
-               (unsigned long) virt_to_phys(image_update_buffer);
-
-       if (img_buf_phys_addr > BIOS_SCAN_LIMIT) {
-               free_pages((unsigned long) image_update_buffer, ordernum);
-               ordernum = -1;
-               image_update_buffer = dma_alloc_coherent(NULL, size,
-                       &dell_rbu_dmaaddr, GFP_KERNEL);
-               dma_alloc = 1;
-       }
-
-       spin_lock(&rbu_data.lock);
-
-       if (image_update_buffer != NULL) {
-               rbu_data.image_update_buffer = image_update_buffer;
-               rbu_data.image_update_buffer_size = size;
-               rbu_data.bios_image_size =
-                       rbu_data.image_update_buffer_size;
-               rbu_data.image_update_ordernum = ordernum;
-               rbu_data.dma_alloc = dma_alloc;
-               rc = 0;
-       } else {
-               pr_debug("Not enough memory for image update:"
-                       "size = %ld\n", size);
-               rc = -ENOMEM;
-       }
-
-       return rc;
-}
-
-static ssize_t read_packet_data(char *buffer, loff_t pos, size_t count)
-{
-       int retval;
-       size_t bytes_left;
-       size_t data_length;
-       char *ptempBuf = buffer;
-
-       /* check to see if we have something to return */
-       if (rbu_data.num_packets == 0) {
-               pr_debug("read_packet_data: no packets written\n");
-               retval = -ENOMEM;
-               goto read_rbu_data_exit;
-       }
-
-       if (pos > rbu_data.imagesize) {
-               retval = 0;
-               printk(KERN_WARNING "dell_rbu:read_packet_data: "
-                       "data underrun\n");
-               goto read_rbu_data_exit;
-       }
-
-       bytes_left = rbu_data.imagesize - pos;
-       data_length = min(bytes_left, count);
-
-       if ((retval = packet_read_list(ptempBuf, &data_length)) < 0)
-               goto read_rbu_data_exit;
-
-       if ((pos + count) > rbu_data.imagesize) {
-               rbu_data.packet_read_count = 0;
-               /* this was the last copy */
-               retval = bytes_left;
-       } else
-               retval = count;
-
-      read_rbu_data_exit:
-       return retval;
-}
-
-static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
-{
-       /* check to see if we have something to return */
-       if ((rbu_data.image_update_buffer == NULL) ||
-               (rbu_data.bios_image_size == 0)) {
-               pr_debug("read_rbu_data_mono: image_update_buffer %p ,"
-                       "bios_image_size %lu\n",
-                       rbu_data.image_update_buffer,
-                       rbu_data.bios_image_size);
-               return -ENOMEM;
-       }
-
-       return memory_read_from_buffer(buffer, count, &pos,
-                       rbu_data.image_update_buffer, rbu_data.bios_image_size);
-}
-
-static ssize_t read_rbu_data(struct file *filp, struct kobject *kobj,
-                            struct bin_attribute *bin_attr,
-                            char *buffer, loff_t pos, size_t count)
-{
-       ssize_t ret_count = 0;
-
-       spin_lock(&rbu_data.lock);
-
-       if (!strcmp(image_type, "mono"))
-               ret_count = read_rbu_mono_data(buffer, pos, count);
-       else if (!strcmp(image_type, "packet"))
-               ret_count = read_packet_data(buffer, pos, count);
-       else
-               pr_debug("read_rbu_data: invalid image type specified\n");
-
-       spin_unlock(&rbu_data.lock);
-       return ret_count;
-}
-
-static void callbackfn_rbu(const struct firmware *fw, void *context)
-{
-       rbu_data.entry_created = 0;
-
-       if (!fw)
-               return;
-
-       if (!fw->size)
-               goto out;
-
-       spin_lock(&rbu_data.lock);
-       if (!strcmp(image_type, "mono")) {
-               if (!img_update_realloc(fw->size))
-                       memcpy(rbu_data.image_update_buffer,
-                               fw->data, fw->size);
-       } else if (!strcmp(image_type, "packet")) {
-               /*
-                * we need to free previous packets if a
-                * new hunk of packets needs to be downloaded
-                */
-               packet_empty_list();
-               if (packetize_data(fw->data, fw->size))
-                       /* Incase something goes wrong when we are
-                        * in middle of packetizing the data, we
-                        * need to free up whatever packets might
-                        * have been created before we quit.
-                        */
-                       packet_empty_list();
-       } else
-               pr_debug("invalid image type specified.\n");
-       spin_unlock(&rbu_data.lock);
- out:
-       release_firmware(fw);
-}
-
-static ssize_t read_rbu_image_type(struct file *filp, struct kobject *kobj,
-                                  struct bin_attribute *bin_attr,
-                                  char *buffer, loff_t pos, size_t count)
-{
-       int size = 0;
-       if (!pos)
-               size = scnprintf(buffer, count, "%s\n", image_type);
-       return size;
-}
-
-static ssize_t write_rbu_image_type(struct file *filp, struct kobject *kobj,
-                                   struct bin_attribute *bin_attr,
-                                   char *buffer, loff_t pos, size_t count)
-{
-       int rc = count;
-       int req_firm_rc = 0;
-       int i;
-       spin_lock(&rbu_data.lock);
-       /*
-        * Find the first newline or space
-        */
-       for (i = 0; i < count; ++i)
-               if (buffer[i] == '\n' || buffer[i] == ' ') {
-                       buffer[i] = '\0';
-                       break;
-               }
-       if (i == count)
-               buffer[count] = '\0';
-
-       if (strstr(buffer, "mono"))
-               strcpy(image_type, "mono");
-       else if (strstr(buffer, "packet"))
-               strcpy(image_type, "packet");
-       else if (strstr(buffer, "init")) {
-               /*
-                * If due to the user error the driver gets in a bad
-                * state where even though it is loaded , the
-                * /sys/class/firmware/dell_rbu entries are missing.
-                * to cover this situation the user can recreate entries
-                * by writing init to image_type.
-                */
-               if (!rbu_data.entry_created) {
-                       spin_unlock(&rbu_data.lock);
-                       req_firm_rc = request_firmware_nowait(THIS_MODULE,
-                               FW_ACTION_NOHOTPLUG, "dell_rbu",
-                               &rbu_device->dev, GFP_KERNEL, &context,
-                               callbackfn_rbu);
-                       if (req_firm_rc) {
-                               printk(KERN_ERR
-                                       "dell_rbu:%s request_firmware_nowait"
-                                       " failed %d\n", __func__, rc);
-                               rc = -EIO;
-                       } else
-                               rbu_data.entry_created = 1;
-
-                       spin_lock(&rbu_data.lock);
-               }
-       } else {
-               printk(KERN_WARNING "dell_rbu: image_type is invalid\n");
-               spin_unlock(&rbu_data.lock);
-               return -EINVAL;
-       }
-
-       /* we must free all previous allocations */
-       packet_empty_list();
-       img_update_free();
-       spin_unlock(&rbu_data.lock);
-
-       return rc;
-}
-
-static ssize_t read_rbu_packet_size(struct file *filp, struct kobject *kobj,
-                                   struct bin_attribute *bin_attr,
-                                   char *buffer, loff_t pos, size_t count)
-{
-       int size = 0;
-       if (!pos) {
-               spin_lock(&rbu_data.lock);
-               size = scnprintf(buffer, count, "%lu\n", rbu_data.packetsize);
-               spin_unlock(&rbu_data.lock);
-       }
-       return size;
-}
-
-static ssize_t write_rbu_packet_size(struct file *filp, struct kobject *kobj,
-                                    struct bin_attribute *bin_attr,
-                                    char *buffer, loff_t pos, size_t count)
-{
-       unsigned long temp;
-       spin_lock(&rbu_data.lock);
-       packet_empty_list();
-       sscanf(buffer, "%lu", &temp);
-       if (temp < 0xffffffff)
-               rbu_data.packetsize = temp;
-
-       spin_unlock(&rbu_data.lock);
-       return count;
-}
-
-static struct bin_attribute rbu_data_attr = {
-       .attr = {.name = "data", .mode = 0444},
-       .read = read_rbu_data,
-};
-
-static struct bin_attribute rbu_image_type_attr = {
-       .attr = {.name = "image_type", .mode = 0644},
-       .read = read_rbu_image_type,
-       .write = write_rbu_image_type,
-};
-
-static struct bin_attribute rbu_packet_size_attr = {
-       .attr = {.name = "packet_size", .mode = 0644},
-       .read = read_rbu_packet_size,
-       .write = write_rbu_packet_size,
-};
-
-static int __init dcdrbu_init(void)
-{
-       int rc;
-       spin_lock_init(&rbu_data.lock);
-
-       init_packet_head();
-       rbu_device = platform_device_register_simple("dell_rbu", -1, NULL, 0);
-       if (IS_ERR(rbu_device)) {
-               printk(KERN_ERR
-                       "dell_rbu:%s:platform_device_register_simple "
-                       "failed\n", __func__);
-               return PTR_ERR(rbu_device);
-       }
-
-       rc = sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_data_attr);
-       if (rc)
-               goto out_devreg;
-       rc = sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr);
-       if (rc)
-               goto out_data;
-       rc = sysfs_create_bin_file(&rbu_device->dev.kobj,
-               &rbu_packet_size_attr);
-       if (rc)
-               goto out_imtype;
-
-       rbu_data.entry_created = 0;
-       return 0;
-
-out_imtype:
-       sysfs_remove_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr);
-out_data:
-       sysfs_remove_bin_file(&rbu_device->dev.kobj, &rbu_data_attr);
-out_devreg:
-       platform_device_unregister(rbu_device);
-       return rc;
-}
-
-static __exit void dcdrbu_exit(void)
-{
-       spin_lock(&rbu_data.lock);
-       packet_empty_list();
-       img_update_free();
-       spin_unlock(&rbu_data.lock);
-       platform_device_unregister(rbu_device);
-}
-
-module_exit(dcdrbu_exit);
-module_init(dcdrbu_init);
-
-/* vim:noet:ts=8:sw=8
-*/
index 3e626fd9bd4e1fafe6e0f4da4597ed66a978bd08..8061667a6765aeb6752a50c134edf60414e7d652 100644 (file)
@@ -229,14 +229,6 @@ sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor,
        return 0;
 }
 
-static inline bool is_compat(void)
-{
-       if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall())
-               return true;
-
-       return false;
-}
-
 static void
 copy_out_compat(struct efi_variable *dst, struct compat_efi_variable *src)
 {
@@ -263,7 +255,7 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
        u8 *data;
        int err;
 
-       if (is_compat()) {
+       if (in_compat_syscall()) {
                struct compat_efi_variable *compat;
 
                if (count != sizeof(*compat))
@@ -324,7 +316,7 @@ efivar_show_raw(struct efivar_entry *entry, char *buf)
                             &entry->var.DataSize, entry->var.Data))
                return -EIO;
 
-       if (is_compat()) {
+       if (in_compat_syscall()) {
                compat = (struct compat_efi_variable *)buf;
 
                size = sizeof(*compat);
@@ -418,7 +410,7 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
        struct compat_efi_variable *compat = (struct compat_efi_variable *)buf;
        struct efi_variable *new_var = (struct efi_variable *)buf;
        struct efivar_entry *new_entry;
-       bool need_compat = is_compat();
+       bool need_compat = in_compat_syscall();
        efi_char16_t *name;
        unsigned long size;
        u32 attributes;
@@ -495,7 +487,7 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
 
-       if (is_compat()) {
+       if (in_compat_syscall()) {
                if (count != sizeof(*compat))
                        return -EINVAL;
 
index ae861342626e3516527167b18d96a7f5ac522169..d92f5b87c251e1248e20fd44708a4d80f759bc00 100644 (file)
@@ -638,7 +638,7 @@ static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo)
        }
         ffdc_iov.iov_base = ffdc;
        ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE;
-        iov_iter_kvec(&ffdc_iter, WRITE | ITER_KVEC, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
+        iov_iter_kvec(&ffdc_iter, WRITE, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
        cmd[0] = cpu_to_be32(2);
        cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC);
        rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter);
@@ -735,7 +735,7 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
        rbytes = (*resp_len) * sizeof(__be32);
        resp_iov.iov_base = response;
        resp_iov.iov_len = rbytes;
-        iov_iter_kvec(&resp_iter, WRITE | ITER_KVEC, &resp_iov, 1, rbytes);
+        iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes);
 
        /* Perform the command */
        mutex_lock(&sbefifo->lock);
index d0102cfc8efbd1825df74c84ed26a0d0042a7e25..104b2e0d893bdad124d8f00405d3287b7bc24992 100644 (file)
@@ -151,6 +151,7 @@ extern int amdgpu_compute_multipipe;
 extern int amdgpu_gpu_recovery;
 extern int amdgpu_emu_mode;
 extern uint amdgpu_smu_memory_pool_size;
+extern uint amdgpu_dc_feature_mask;
 extern struct amdgpu_mgpu_info mgpu_info;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
index 297a5490ad8c0be64157364419ca6e2f5b1eab1a..0a4fba196b843e4fe27b48b94f478478d1281749 100644 (file)
@@ -135,7 +135,8 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
         * 2. power off the acp tiles
         * 3. check and enter ulv state
         */
-               if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+               if (adev->powerplay.pp_funcs &&
+                       adev->powerplay.pp_funcs->set_powergating_by_smu)
                        amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
        }
        return 0;
@@ -517,7 +518,8 @@ static int acp_set_powergating_state(void *handle,
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        bool enable = state == AMD_PG_STATE_GATE ? true : false;
 
-       if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+       if (adev->powerplay.pp_funcs &&
+               adev->powerplay.pp_funcs->set_powergating_by_smu)
                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
 
        return 0;
index 1e4dd09a50726646cf117a480ddabfa5f2aecc2c..30bc345d6fdf0d5827c2aa737d284da92787ea70 100644 (file)
@@ -1493,8 +1493,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
        }
 
        adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
-       if (amdgpu_sriov_vf(adev))
-               adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK;
 
        for (i = 0; i < adev->num_ip_blocks; i++) {
                if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
@@ -1600,7 +1598,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
                }
        }
 
-       if (adev->powerplay.pp_funcs->load_firmware) {
+       if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
                r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
                if (r) {
                        pr_err("firmware loading failed\n");
@@ -3341,7 +3339,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
                kthread_park(ring->sched.thread);
 
-               if (job && job->base.sched == &ring->sched)
+               if (job && job->base.sched != &ring->sched)
                        continue;
 
                drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
index 28781414d71c85e4dc5657e7cbfb237a39f5a3c3..8de55f7f1a3a3922b4a1ac2d17cf12cdd35d1fd6 100644 (file)
@@ -114,8 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
-/* OverDrive(bit 14) disabled by default*/
-uint amdgpu_pp_feature_mask = 0xffffbfff;
+/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/
+uint amdgpu_pp_feature_mask = 0xfffd3fff;
 int amdgpu_ngg = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
@@ -127,6 +127,9 @@ int amdgpu_compute_multipipe = -1;
 int amdgpu_gpu_recovery = -1; /* auto */
 int amdgpu_emu_mode = 0;
 uint amdgpu_smu_memory_pool_size = 0;
+/* FBC (bit 0) disabled by default*/
+uint amdgpu_dc_feature_mask = 0;
+
 struct amdgpu_mgpu_info mgpu_info = {
        .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
 };
@@ -631,6 +634,14 @@ module_param(halt_if_hws_hang, int, 0644);
 MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
 #endif
 
+/**
+ * DOC: dcfeaturemask (uint)
+ * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * The default is the current set of stable display features.
+ */
+MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))");
+module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
        {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
index 790fd5408ddff2dbb6aa349c988b40ebfaf82536..1a656b8657f736fa0385aba0c54c6548d72af819 100644 (file)
@@ -392,7 +392,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
        if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK))
                return;
 
-       if (!adev->powerplay.pp_funcs->set_powergating_by_smu)
+       if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu)
                return;
 
 
index 94055a485e01300e5106fa261b6ce51b636360ab..59cc678de8c1570642afc2d488f63fbc179a1e99 100644 (file)
@@ -704,7 +704,10 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
                return ret;
 
        if (adev->powerplay.pp_funcs->force_clock_level)
-               amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+               ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+
+       if (ret)
+               return -EINVAL;
 
        return count;
 }
@@ -737,7 +740,10 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
                return ret;
 
        if (adev->powerplay.pp_funcs->force_clock_level)
-               amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+               ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+
+       if (ret)
+               return -EINVAL;
 
        return count;
 }
@@ -770,7 +776,10 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
                return ret;
 
        if (adev->powerplay.pp_funcs->force_clock_level)
-               amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+               ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+
+       if (ret)
+               return -EINVAL;
 
        return count;
 }
index 6904d794d60a7a5c06057f74cf930db06a01e195..352b304090602e342ef6f584acab90edf126f51a 100644 (file)
@@ -542,7 +542,8 @@ static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
                                   struct amdgpu_vm_pt_cursor *cursor)
 {
        amdgpu_vm_pt_next(adev, cursor);
-       while (amdgpu_vm_pt_descendant(adev, cursor));
+       if (cursor->pfn != ~0ll)
+               while (amdgpu_vm_pt_descendant(adev, cursor));
 }
 
 /**
@@ -3234,8 +3235,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        }
        rbtree_postorder_for_each_entry_safe(mapping, tmp,
                                             &vm->va.rb_root, rb) {
+               /* Don't remove the mapping here, we don't want to trigger a
+                * rebalance and the tree is about to be destroyed anyway.
+                */
                list_del(&mapping->list);
-               amdgpu_vm_it_remove(mapping, &vm->va);
                kfree(mapping);
        }
        list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
index 3d0f277a6523f80a4e2ee7e66c94c494b94448bc..617b0c8908a375aa0d132af1868f3eaf9e2067b1 100644 (file)
@@ -4815,8 +4815,10 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
        if (r)
                goto done;
 
-       /* Test KCQs */
-       for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+       /* Test KCQs - reversing the order of rings seems to fix ring test failure
+        * after GPU reset
+        */
+       for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
                ring = &adev->gfx.compute_ring[i];
                ring->ready = true;
                r = amdgpu_ring_test_ring(ring);
index 14649f8475f3f68cfe9ebd2a816864cae1318e0f..fd23ba1226a57d9d3f1189db15f07daaab7e199d 100644 (file)
@@ -280,7 +280,7 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
                return;
 
        if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) {
-               if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+               if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_powergating_by_smu)
                        amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true);
 
        }
index 04fa3d972636bb9878191ec9789f36a9684f6b04..7a8c9172d30a946fd91d147f8c73267a51b1fb08 100644 (file)
@@ -1366,7 +1366,8 @@ static int sdma_v4_0_hw_init(void *handle)
        int r;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu)
+       if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
+                       adev->powerplay.pp_funcs->set_powergating_by_smu)
                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
 
        sdma_v4_0_init_golden_registers(adev);
@@ -1386,7 +1387,8 @@ static int sdma_v4_0_hw_fini(void *handle)
        sdma_v4_0_ctx_switch_enable(adev, false);
        sdma_v4_0_enable(adev, false);
 
-       if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu)
+       if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
+                       && adev->powerplay.pp_funcs->set_powergating_by_smu)
                amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
 
        return 0;
index 2d4473557b0d23210782ff72397b47bd7f9c94ef..d13fc4fcb51790859f03aefb14f4bd90067c8fd8 100644 (file)
@@ -49,6 +49,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
                adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
                adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
                adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+               adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
        }
        return 0;
 }
index e224f23e22155918a742bbd13e45131edc9463b0..c1262f62cd9f21400c68a6040000d31e4a9a9716 100644 (file)
@@ -429,6 +429,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
            adev->asic_type < CHIP_RAVEN)
                init_data.flags.gpu_vm_support = true;
 
+       if (amdgpu_dc_feature_mask & DC_FBC_MASK)
+               init_data.flags.fbc_support = true;
+
        /* Display Core create. */
        adev->dm.dc = dc_create(&init_data);
 
@@ -2700,18 +2703,11 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
        drm_connector = &aconnector->base;
 
        if (!aconnector->dc_sink) {
-               /*
-                * Create dc_sink when necessary to MST
-                * Don't apply fake_sink to MST
-                */
-               if (aconnector->mst_port) {
-                       dm_dp_mst_dc_sink_create(drm_connector);
-                       return stream;
+               if (!aconnector->mst_port) {
+                       sink = create_fake_sink(aconnector);
+                       if (!sink)
+                               return stream;
                }
-
-               sink = create_fake_sink(aconnector);
-               if (!sink)
-                       return stream;
        } else {
                sink = aconnector->dc_sink;
        }
@@ -3301,7 +3297,7 @@ void dm_drm_plane_destroy_state(struct drm_plane *plane,
 static const struct drm_plane_funcs dm_plane_funcs = {
        .update_plane   = drm_atomic_helper_update_plane,
        .disable_plane  = drm_atomic_helper_disable_plane,
-       .destroy        = drm_plane_cleanup,
+       .destroy        = drm_primary_helper_destroy,
        .reset = dm_drm_plane_reset,
        .atomic_duplicate_state = dm_drm_plane_duplicate_state,
        .atomic_destroy_state = dm_drm_plane_destroy_state,
index 978b34a5011ce508055064658b556ef5082097e7..924a38a1fc446019a0aac035b018b952e3be9923 100644 (file)
@@ -160,8 +160,6 @@ struct amdgpu_dm_connector {
        struct mutex hpd_lock;
 
        bool fake_enable;
-
-       bool mst_connected;
 };
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
index 03601d717fed90708463fca143a6de50d6b750b1..d02c32a1039c02b97f02d721030ae82e546799df 100644 (file)
@@ -205,40 +205,6 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
        .atomic_get_property = amdgpu_dm_connector_atomic_get_property
 };
 
-void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
-{
-       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
-       struct dc_sink *dc_sink;
-       struct dc_sink_init_data init_params = {
-                       .link = aconnector->dc_link,
-                       .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
-
-       /* FIXME none of this is safe. we shouldn't touch aconnector here in
-        * atomic_check
-        */
-
-       /*
-        * TODO: Need to further figure out why ddc.algo is NULL while MST port exists
-        */
-       if (!aconnector->port || !aconnector->port->aux.ddc.algo)
-               return;
-
-       ASSERT(aconnector->edid);
-
-       dc_sink = dc_link_add_remote_sink(
-               aconnector->dc_link,
-               (uint8_t *)aconnector->edid,
-               (aconnector->edid->extensions + 1) * EDID_LENGTH,
-               &init_params);
-
-       dc_sink->priv = aconnector;
-       aconnector->dc_sink = dc_sink;
-
-       if (aconnector->dc_sink)
-               amdgpu_dm_update_freesync_caps(
-                               connector, aconnector->edid);
-}
-
 static int dm_dp_mst_get_modes(struct drm_connector *connector)
 {
        struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -319,12 +285,7 @@ dm_dp_create_fake_mst_encoder(struct amdgpu_dm_connector *connector)
        struct amdgpu_device *adev = dev->dev_private;
        struct amdgpu_encoder *amdgpu_encoder;
        struct drm_encoder *encoder;
-       const struct drm_connector_helper_funcs *connector_funcs =
-               connector->base.helper_private;
-       struct drm_encoder *enc_master =
-               connector_funcs->best_encoder(&connector->base);
 
-       DRM_DEBUG_KMS("enc master is %p\n", enc_master);
        amdgpu_encoder = kzalloc(sizeof(*amdgpu_encoder), GFP_KERNEL);
        if (!amdgpu_encoder)
                return NULL;
@@ -354,25 +315,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
        struct amdgpu_device *adev = dev->dev_private;
        struct amdgpu_dm_connector *aconnector;
        struct drm_connector *connector;
-       struct drm_connector_list_iter conn_iter;
-
-       drm_connector_list_iter_begin(dev, &conn_iter);
-       drm_for_each_connector_iter(connector, &conn_iter) {
-               aconnector = to_amdgpu_dm_connector(connector);
-               if (aconnector->mst_port == master
-                               && !aconnector->port) {
-                       DRM_INFO("DM_MST: reusing connector: %p [id: %d] [master: %p]\n",
-                                               aconnector, connector->base.id, aconnector->mst_port);
-
-                       aconnector->port = port;
-                       drm_connector_set_path_property(connector, pathprop);
-
-                       drm_connector_list_iter_end(&conn_iter);
-                       aconnector->mst_connected = true;
-                       return &aconnector->base;
-               }
-       }
-       drm_connector_list_iter_end(&conn_iter);
 
        aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL);
        if (!aconnector)
@@ -421,8 +363,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
         */
        amdgpu_dm_connector_funcs_reset(connector);
 
-       aconnector->mst_connected = true;
-
        DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
                        aconnector, connector->base.id, aconnector->mst_port);
 
@@ -434,6 +374,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
                                        struct drm_connector *connector)
 {
+       struct amdgpu_dm_connector *master = container_of(mgr, struct amdgpu_dm_connector, mst_mgr);
+       struct drm_device *dev = master->base.dev;
+       struct amdgpu_device *adev = dev->dev_private;
        struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 
        DRM_INFO("DM_MST: Disabling connector: %p [id: %d] [master: %p]\n",
@@ -447,7 +390,10 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
                aconnector->dc_sink = NULL;
        }
 
-       aconnector->mst_connected = false;
+       drm_connector_unregister(connector);
+       if (adev->mode_info.rfbdev)
+               drm_fb_helper_remove_one_connector(&adev->mode_info.rfbdev->helper, connector);
+       drm_connector_put(connector);
 }
 
 static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
@@ -458,18 +404,10 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
        drm_kms_helper_hotplug_event(dev);
 }
 
-static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
-{
-       mutex_lock(&connector->dev->mode_config.mutex);
-       drm_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD);
-       mutex_unlock(&connector->dev->mode_config.mutex);
-}
-
 static void dm_dp_mst_register_connector(struct drm_connector *connector)
 {
        struct drm_device *dev = connector->dev;
        struct amdgpu_device *adev = dev->dev_private;
-       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 
        if (adev->mode_info.rfbdev)
                drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector);
@@ -477,9 +415,6 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector)
                DRM_ERROR("adev->mode_info.rfbdev is NULL\n");
 
        drm_connector_register(connector);
-
-       if (aconnector->mst_connected)
-               dm_dp_mst_link_status_reset(connector);
 }
 
 static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
index 8cf51da26657e29e72062b34aeed7e5d827f9e21..2da851b40042aee9b79eb2c666d45c0f5061fee0 100644 (file)
@@ -31,6 +31,5 @@ struct amdgpu_dm_connector;
 
 void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
                                       struct amdgpu_dm_connector *aconnector);
-void dm_dp_mst_dc_sink_create(struct drm_connector *connector);
 
 #endif
index 0fab64a2a9150f723422f8e3600174866b03cc1b..12001a006b2d8e1d0b5f3734c189e9faf23d94e5 100644 (file)
@@ -101,7 +101,7 @@ bool dm_pp_apply_display_requirements(
                        adev->pm.pm_display_cfg.displays[i].controller_id = dc_cfg->pipe_idx + 1;
                }
 
-               if (adev->powerplay.pp_funcs->display_configuration_change)
+               if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_configuration_change)
                        adev->powerplay.pp_funcs->display_configuration_change(
                                adev->powerplay.pp_handle,
                                &adev->pm.pm_display_cfg);
@@ -304,7 +304,7 @@ bool dm_pp_get_clock_levels_by_type(
        struct amd_pp_simple_clock_info validation_clks = { 0 };
        uint32_t i;
 
-       if (adev->powerplay.pp_funcs->get_clock_by_type) {
+       if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) {
                if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle,
                        dc_to_pp_clock_type(clk_type), &pp_clks)) {
                /* Error in pplib. Provide default values. */
@@ -315,7 +315,7 @@ bool dm_pp_get_clock_levels_by_type(
 
        pp_to_dc_clock_levels(&pp_clks, dc_clks, clk_type);
 
-       if (adev->powerplay.pp_funcs->get_display_mode_validation_clocks) {
+       if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_display_mode_validation_clocks) {
                if (adev->powerplay.pp_funcs->get_display_mode_validation_clocks(
                                                pp_handle, &validation_clks)) {
                        /* Error in pplib. Provide default values. */
@@ -398,6 +398,9 @@ bool dm_pp_get_clock_levels_by_type_with_voltage(
        struct pp_clock_levels_with_voltage pp_clk_info = {0};
        const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 
+       if (!pp_funcs || !pp_funcs->get_clock_by_type_with_voltage)
+               return false;
+
        if (pp_funcs->get_clock_by_type_with_voltage(pp_handle,
                                                     dc_to_pp_clock_type(clk_type),
                                                     &pp_clk_info))
@@ -438,7 +441,7 @@ bool dm_pp_apply_clock_for_voltage_request(
        if (!pp_clock_request.clock_type)
                return false;
 
-       if (adev->powerplay.pp_funcs->display_clock_voltage_request)
+       if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_clock_voltage_request)
                ret = adev->powerplay.pp_funcs->display_clock_voltage_request(
                        adev->powerplay.pp_handle,
                        &pp_clock_request);
@@ -455,7 +458,7 @@ bool dm_pp_get_static_clocks(
        struct amd_pp_clock_info pp_clk_info = {0};
        int ret = 0;
 
-       if (adev->powerplay.pp_funcs->get_current_clocks)
+       if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_current_clocks)
                ret = adev->powerplay.pp_funcs->get_current_clocks(
                        adev->powerplay.pp_handle,
                        &pp_clk_info);
@@ -505,6 +508,9 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp,
        wm_with_clock_ranges.num_wm_dmif_sets = ranges->num_reader_wm_sets;
        wm_with_clock_ranges.num_wm_mcif_sets = ranges->num_writer_wm_sets;
 
+       if (!pp_funcs || !pp_funcs->set_watermarks_for_clocks_ranges)
+               return;
+
        for (i = 0; i < wm_with_clock_ranges.num_wm_dmif_sets; i++) {
                if (ranges->reader_wm_sets[i].wm_inst > 3)
                        wm_dce_clocks[i].wm_set_id = WM_SET_A;
index fb04a4ad141fdb68f68a747f6c4474a15e7da8a2..5da2186b3615ff97b3bc54a68b3849ad6c85856f 100644 (file)
@@ -1722,7 +1722,7 @@ static void write_i2c_retimer_setting(
                i2c_success = i2c_write(pipe_ctx, slave_address,
                                buffer, sizeof(buffer));
                RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
-                       offset = 0x%d, reg_val = 0x%d, i2c_success = %d\n",
+                       offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
                        slave_address, buffer[0], buffer[1], i2c_success?1:0);
                if (!i2c_success)
                        /* Write failure */
@@ -1734,7 +1734,7 @@ static void write_i2c_retimer_setting(
                i2c_success = i2c_write(pipe_ctx, slave_address,
                                buffer, sizeof(buffer));
                RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
-                       offset = 0x%d, reg_val = 0x%d, i2c_success = %d\n",
+                       offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
                        slave_address, buffer[0], buffer[1], i2c_success?1:0);
                if (!i2c_success)
                        /* Write failure */
index 199527171100b0ed7cbd34aae7468e0989efc9a6..b57fa61b3034a14869a2cee91423b7f7e0fa11e0 100644 (file)
@@ -169,6 +169,7 @@ struct link_training_settings;
 struct dc_config {
        bool gpu_vm_support;
        bool disable_disp_pll_sharing;
+       bool fbc_support;
 };
 
 enum visual_confirm {
index b75ede5f84f76837960463387a90ca35aa7ac62a..b459867a05b202e84a1a59c5ea656fc4b4395b2a 100644 (file)
@@ -1736,7 +1736,12 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx,
        if (events->force_trigger)
                value |= 0x1;
 
-       value |= 0x84;
+       if (num_pipes) {
+               struct dc *dc = pipe_ctx[0]->stream->ctx->dc;
+
+               if (dc->fbc_compressor)
+                       value |= 0x84;
+       }
 
        for (i = 0; i < num_pipes; i++)
                pipe_ctx[i]->stream_res.tg->funcs->
index de190935f0a456000cbabbdcc723b7c1a43667b7..7c9fd9052ee233f2c91d5a2c5cd4c91765603335 100644 (file)
@@ -568,7 +568,7 @@ static struct input_pixel_processor *dce110_ipp_create(
 
 static const struct encoder_feature_support link_enc_feature = {
                .max_hdmi_deep_color = COLOR_DEPTH_121212,
-               .max_hdmi_pixel_clock = 594000,
+               .max_hdmi_pixel_clock = 300000,
                .flags.bits.IS_HBR2_CAPABLE = true,
                .flags.bits.IS_TPS3_CAPABLE = true
 };
@@ -1362,7 +1362,8 @@ static bool construct(
                pool->base.sw_i2cs[i] = NULL;
        }
 
-       dc->fbc_compressor = dce110_compressor_create(ctx);
+       if (dc->config.fbc_support)
+               dc->fbc_compressor = dce110_compressor_create(ctx);
 
        if (!underlay_create(ctx, &pool->base))
                goto res_create_fail;
index a407892905af29661a70ad75a6c76c5d502163c3..c0d9f332baedc10d71e701b5f19e254d8b7af40d 100644 (file)
@@ -40,8 +40,6 @@
 #define LITTLEENDIAN_CPU
 #endif
 
-#undef READ
-#undef WRITE
 #undef FRAME_SIZE
 
 #define dm_output_to_console(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__)
index 2083c308007cde72412f7ad7291dc85fe882e532..470d7b89071a40163dc039af84a4bc345852cfcf 100644 (file)
@@ -133,6 +133,10 @@ enum PP_FEATURE_MASK {
        PP_AVFS_MASK = 0x40000,
 };
 
+enum DC_FEATURE_MASK {
+       DC_FBC_MASK = 0x1,
+};
+
 /**
  * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks
  */
index d2e7c0fa96c2f7263f367cf67597993aa9ebcbf5..8eb0bb241210bdffe3ff4f3e280bed4856a1c810 100644 (file)
@@ -1325,7 +1325,7 @@ struct atom_smu_info_v3_3 {
   struct   atom_common_table_header  table_header;
   uint8_t  smuip_min_ver;
   uint8_t  smuip_max_ver;
-  uint8_t  smu_rsd1;
+  uint8_t  waflclk_ss_mode;
   uint8_t  gpuclk_ss_mode;
   uint16_t sclk_ss_percentage;
   uint16_t sclk_ss_rate_10hz;
@@ -1355,7 +1355,10 @@ struct atom_smu_info_v3_3 {
   uint32_t syspll3_1_vco_freq_10khz;
   uint32_t bootup_fclk_10khz;
   uint32_t bootup_waflclk_10khz;
-  uint32_t reserved[3];
+  uint32_t smu_info_caps;
+  uint16_t waflclk_ss_percentage;    // in unit of 0.001%
+  uint16_t smuinitoffset;
+  uint32_t reserved;
 };
 
 /*
index e8964cae6b93dba0c3d183dcc5383fa12734fca3..d6aa1d414320bf1d63bb84ffb490ac8e8b6417e6 100644 (file)
@@ -723,11 +723,14 @@ static int pp_dpm_force_clock_level(void *handle,
                pr_info("%s was not implemented.\n", __func__);
                return 0;
        }
+
+       if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
+               pr_info("force clock level is for dpm manual mode only.\n");
+               return -EINVAL;
+       }
+
        mutex_lock(&hwmgr->smu_lock);
-       if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL)
-               ret = hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask);
-       else
-               ret = -EINVAL;
+       ret = hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask);
        mutex_unlock(&hwmgr->smu_lock);
        return ret;
 }
@@ -963,6 +966,7 @@ static int pp_dpm_switch_power_profile(void *handle,
 static int pp_set_power_limit(void *handle, uint32_t limit)
 {
        struct pp_hwmgr *hwmgr = handle;
+       uint32_t max_power_limit;
 
        if (!hwmgr || !hwmgr->pm_en)
                return -EINVAL;
@@ -975,7 +979,13 @@ static int pp_set_power_limit(void *handle, uint32_t limit)
        if (limit == 0)
                limit = hwmgr->default_power_limit;
 
-       if (limit > hwmgr->default_power_limit)
+       max_power_limit = hwmgr->default_power_limit;
+       if (hwmgr->od_enabled) {
+               max_power_limit *= (100 + hwmgr->platform_descriptor.TDPODLimit);
+               max_power_limit /= 100;
+       }
+
+       if (limit > max_power_limit)
                return -EINVAL;
 
        mutex_lock(&hwmgr->smu_lock);
@@ -994,8 +1004,13 @@ static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit)
 
        mutex_lock(&hwmgr->smu_lock);
 
-       if (default_limit)
+       if (default_limit) {
                *limit = hwmgr->default_power_limit;
+               if (hwmgr->od_enabled) {
+                       *limit *= (100 + hwmgr->platform_descriptor.TDPODLimit);
+                       *limit /= 100;
+               }
+       }
        else
                *limit = hwmgr->power_limit;
 
@@ -1303,12 +1318,12 @@ static int pp_enable_mgpu_fan_boost(void *handle)
 {
        struct pp_hwmgr *hwmgr = handle;
 
-       if (!hwmgr || !hwmgr->pm_en)
+       if (!hwmgr)
                return -EINVAL;
 
-       if (hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) {
+       if (!hwmgr->pm_en ||
+            hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL)
                return 0;
-       }
 
        mutex_lock(&hwmgr->smu_lock);
        hwmgr->hwmgr_func->enable_mgpu_fan_boost(hwmgr);
index 6c99cbf51c08fd035fa3da585c06b8b7074bdb29..ed35ec0341e671ab8f5cee4d89d2417283147bf3 100644 (file)
@@ -3588,9 +3588,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
                        break;
        }
 
-       if (i >= sclk_table->count)
+       if (i >= sclk_table->count) {
                data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
-       else {
+               sclk_table->dpm_levels[i-1].value = sclk;
+       } else {
        /* TODO: Check SCLK in DAL's minimum clocks
         * in case DeepSleep divider update is required.
         */
@@ -3605,9 +3606,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
                        break;
        }
 
-       if (i >= mclk_table->count)
+       if (i >= mclk_table->count) {
                data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-
+               mclk_table->dpm_levels[i-1].value = mclk;
+       }
 
        if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
                data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK;
index 4714b5b598255b1cad2790b76011f02e98afd704..99a33c33a32c9e47fb8bbaf455d8f44efe69c18f 100644 (file)
@@ -718,7 +718,7 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table,
                table->WatermarkRow[1][i].MaxClock =
                        cpu_to_le16((uint16_t)
                        (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz) /
-                       100);
+                       1000);
                table->WatermarkRow[1][i].MinUclk =
                        cpu_to_le16((uint16_t)
                        (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz) /
index 419a1d77d661e3708fded47180ce2b532592a087..8c4db86bb4b770b345575564ad335d48e2681835 100644 (file)
@@ -1333,7 +1333,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
        if (hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0)
                hwmgr->platform_descriptor.overdriveLimit.memoryClock =
                                        dpm_table->dpm_levels[dpm_table->count-1].value;
-
        vega10_init_dpm_state(&(dpm_table->dpm_state));
 
        data->dpm_table.eclk_table.count = 0;
@@ -3249,6 +3248,37 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input)
 {
        struct vega10_hwmgr *data = hwmgr->backend;
+       const struct phm_set_power_state_input *states =
+                       (const struct phm_set_power_state_input *)input;
+       const struct vega10_power_state *vega10_ps =
+                       cast_const_phw_vega10_power_state(states->pnew_state);
+       struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table);
+       uint32_t sclk = vega10_ps->performance_levels
+                       [vega10_ps->performance_level_count - 1].gfx_clock;
+       struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
+       uint32_t mclk = vega10_ps->performance_levels
+                       [vega10_ps->performance_level_count - 1].mem_clock;
+       uint32_t i;
+
+       for (i = 0; i < sclk_table->count; i++) {
+               if (sclk == sclk_table->dpm_levels[i].value)
+                       break;
+       }
+
+       if (i >= sclk_table->count) {
+               data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+               sclk_table->dpm_levels[i-1].value = sclk;
+       }
+
+       for (i = 0; i < mclk_table->count; i++) {
+               if (mclk == mclk_table->dpm_levels[i].value)
+                       break;
+       }
+
+       if (i >= mclk_table->count) {
+               data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+               mclk_table->dpm_levels[i-1].value = mclk;
+       }
 
        if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
                data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
@@ -4529,11 +4559,13 @@ static int vega10_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
 
        if (vega10_ps->performance_levels
                        [vega10_ps->performance_level_count - 1].gfx_clock >
-                       hwmgr->platform_descriptor.overdriveLimit.engineClock)
+                       hwmgr->platform_descriptor.overdriveLimit.engineClock) {
                vega10_ps->performance_levels
                [vega10_ps->performance_level_count - 1].gfx_clock =
                                hwmgr->platform_descriptor.overdriveLimit.engineClock;
-
+               pr_warn("max sclk supported by vbios is %d\n",
+                               hwmgr->platform_descriptor.overdriveLimit.engineClock);
+       }
        return 0;
 }
 
@@ -4581,10 +4613,13 @@ static int vega10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
 
        if (vega10_ps->performance_levels
                        [vega10_ps->performance_level_count - 1].mem_clock >
-                       hwmgr->platform_descriptor.overdriveLimit.memoryClock)
+                       hwmgr->platform_descriptor.overdriveLimit.memoryClock) {
                vega10_ps->performance_levels
                [vega10_ps->performance_level_count - 1].mem_clock =
                                hwmgr->platform_descriptor.overdriveLimit.memoryClock;
+               pr_warn("max mclk supported by vbios is %d\n",
+                               hwmgr->platform_descriptor.overdriveLimit.memoryClock);
+       }
 
        return 0;
 }
index 9600e2f226e98e2be4d647d839bd98aef883de30..74bc37308dc09cb8303cd42146c7d88af3a27c58 100644 (file)
@@ -2356,6 +2356,13 @@ static int vega12_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable)
                return vega12_disable_gfx_off(hwmgr);
 }
 
+static int vega12_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+                               PHM_PerformanceLevelDesignation designation, uint32_t index,
+                               PHM_PerformanceLevel *level)
+{
+       return 0;
+}
+
 static const struct pp_hwmgr_func vega12_hwmgr_funcs = {
        .backend_init = vega12_hwmgr_backend_init,
        .backend_fini = vega12_hwmgr_backend_fini,
@@ -2406,6 +2413,7 @@ static const struct pp_hwmgr_func vega12_hwmgr_funcs = {
        .register_irq_handlers = smu9_register_irq_handlers,
        .start_thermal_controller = vega12_start_thermal_controller,
        .powergate_gfx = vega12_gfx_off_control,
+       .get_performance_level = vega12_get_performance_level,
 };
 
 int vega12_hwmgr_init(struct pp_hwmgr *hwmgr)
index b4dbbb7c334ce04c9760f0114825cbdc0ba5ee88..99861f32b1f95aedfb5c64d39814baca329662c2 100644 (file)
@@ -120,6 +120,7 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
        data->registry_data.disable_auto_wattman = 1;
        data->registry_data.auto_wattman_debug = 0;
        data->registry_data.auto_wattman_sample_period = 100;
+       data->registry_data.fclk_gfxclk_ratio = 0x3F6CCCCD;
        data->registry_data.auto_wattman_threshold = 50;
        data->registry_data.gfxoff_controlled_by_driver = 1;
        data->gfxoff_allowed = false;
@@ -829,6 +830,28 @@ static int vega20_enable_all_smu_features(struct pp_hwmgr *hwmgr)
        return 0;
 }
 
+static int vega20_notify_smc_display_change(struct pp_hwmgr *hwmgr)
+{
+       struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
+
+       if (data->smu_features[GNLD_DPM_UCLK].enabled)
+               return smum_send_msg_to_smc_with_parameter(hwmgr,
+                       PPSMC_MSG_SetUclkFastSwitch,
+                       1);
+
+       return 0;
+}
+
+static int vega20_send_clock_ratio(struct pp_hwmgr *hwmgr)
+{
+       struct vega20_hwmgr *data =
+                       (struct vega20_hwmgr *)(hwmgr->backend);
+
+       return smum_send_msg_to_smc_with_parameter(hwmgr,
+                       PPSMC_MSG_SetFclkGfxClkRatio,
+                       data->registry_data.fclk_gfxclk_ratio);
+}
+
 static int vega20_disable_all_smu_features(struct pp_hwmgr *hwmgr)
 {
        struct vega20_hwmgr *data =
@@ -1532,6 +1555,16 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
                        "[EnableDPMTasks] Failed to enable all smu features!",
                        return result);
 
+       result = vega20_notify_smc_display_change(hwmgr);
+       PP_ASSERT_WITH_CODE(!result,
+                       "[EnableDPMTasks] Failed to notify smc display change!",
+                       return result);
+
+       result = vega20_send_clock_ratio(hwmgr);
+       PP_ASSERT_WITH_CODE(!result,
+                       "[EnableDPMTasks] Failed to send clock ratio!",
+                       return result);
+
        /* Initialize UVD/VCE powergating state */
        vega20_init_powergate_state(hwmgr);
 
@@ -1875,38 +1908,20 @@ static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr,
        return ret;
 }
 
-static int vega20_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq)
-{
-       uint32_t gfx_clk = 0;
-       int ret = 0;
-
-       *gfx_freq = 0;
-
-       PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-                       PPSMC_MSG_GetDpmClockFreq, (PPCLK_GFXCLK << 16))) == 0,
-                       "[GetCurrentGfxClkFreq] Attempt to get Current GFXCLK Frequency Failed!",
-                       return ret);
-       gfx_clk = smum_get_argument(hwmgr);
-
-       *gfx_freq = gfx_clk * 100;
-
-       return 0;
-}
-
-static int vega20_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_freq)
+static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr,
+               PPCLK_e clk_id, uint32_t *clk_freq)
 {
-       uint32_t mem_clk = 0;
        int ret = 0;
 
-       *mclk_freq = 0;
+       *clk_freq = 0;
 
        PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-                       PPSMC_MSG_GetDpmClockFreq, (PPCLK_UCLK << 16))) == 0,
-                       "[GetCurrentMClkFreq] Attempt to get Current MCLK Frequency Failed!",
+                       PPSMC_MSG_GetDpmClockFreq, (clk_id << 16))) == 0,
+                       "[GetCurrentClkFreq] Attempt to get Current Frequency Failed!",
                        return ret);
-       mem_clk = smum_get_argument(hwmgr);
+       *clk_freq = smum_get_argument(hwmgr);
 
-       *mclk_freq = mem_clk * 100;
+       *clk_freq = *clk_freq * 100;
 
        return 0;
 }
@@ -1937,12 +1952,16 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 
        switch (idx) {
        case AMDGPU_PP_SENSOR_GFX_SCLK:
-               ret = vega20_get_current_gfx_clk_freq(hwmgr, (uint32_t *)value);
+               ret = vega20_get_current_clk_freq(hwmgr,
+                               PPCLK_GFXCLK,
+                               (uint32_t *)value);
                if (!ret)
                        *size = 4;
                break;
        case AMDGPU_PP_SENSOR_GFX_MCLK:
-               ret = vega20_get_current_mclk_freq(hwmgr, (uint32_t *)value);
+               ret = vega20_get_current_clk_freq(hwmgr,
+                               PPCLK_UCLK,
+                               (uint32_t *)value);
                if (!ret)
                        *size = 4;
                break;
@@ -1986,19 +2005,6 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
        return ret;
 }
 
-static int vega20_notify_smc_display_change(struct pp_hwmgr *hwmgr,
-               bool has_disp)
-{
-       struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
-
-       if (data->smu_features[GNLD_DPM_UCLK].enabled)
-               return smum_send_msg_to_smc_with_parameter(hwmgr,
-                       PPSMC_MSG_SetUclkFastSwitch,
-                       has_disp ? 1 : 0);
-
-       return 0;
-}
-
 int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
                struct pp_display_clock_request *clock_req)
 {
@@ -2012,7 +2018,6 @@ int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
        if (data->smu_features[GNLD_DPM_DCEFCLK].enabled) {
                switch (clk_type) {
                case amd_pp_dcef_clock:
-                       clk_freq = clock_req->clock_freq_in_khz / 100;
                        clk_select = PPCLK_DCEFCLK;
                        break;
                case amd_pp_disp_clock:
@@ -2041,29 +2046,31 @@ int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
        return result;
 }
 
+static int vega20_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+                               PHM_PerformanceLevelDesignation designation, uint32_t index,
+                               PHM_PerformanceLevel *level)
+{
+       return 0;
+}
+
 static int vega20_notify_smc_display_config_after_ps_adjustment(
                struct pp_hwmgr *hwmgr)
 {
        struct vega20_hwmgr *data =
                        (struct vega20_hwmgr *)(hwmgr->backend);
+       struct vega20_single_dpm_table *dpm_table =
+                       &data->dpm_table.mem_table;
        struct PP_Clocks min_clocks = {0};
        struct pp_display_clock_request clock_req;
        int ret = 0;
 
-       if ((hwmgr->display_config->num_display > 1) &&
-            !hwmgr->display_config->multi_monitor_in_sync &&
-            !hwmgr->display_config->nb_pstate_switch_disable)
-               vega20_notify_smc_display_change(hwmgr, false);
-       else
-               vega20_notify_smc_display_change(hwmgr, true);
-
        min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
        min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk;
        min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
 
        if (data->smu_features[GNLD_DPM_DCEFCLK].supported) {
                clock_req.clock_type = amd_pp_dcef_clock;
-               clock_req.clock_freq_in_khz = min_clocks.dcefClock;
+               clock_req.clock_freq_in_khz = min_clocks.dcefClock * 10;
                if (!vega20_display_clock_voltage_request(hwmgr, &clock_req)) {
                        if (data->smu_features[GNLD_DS_DCEFCLK].supported)
                                PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(
@@ -2076,6 +2083,15 @@ static int vega20_notify_smc_display_config_after_ps_adjustment(
                }
        }
 
+       if (data->smu_features[GNLD_DPM_UCLK].enabled) {
+               dpm_table->dpm_state.hard_min_level = min_clocks.memoryClock / 100;
+               PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+                               PPSMC_MSG_SetHardMinByFreq,
+                               (PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level)),
+                               "[SetHardMinFreq] Set hard min uclk failed!",
+                               return ret);
+       }
+
        return 0;
 }
 
@@ -2353,7 +2369,7 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr,
 
        for (i = 0; i < count; i++) {
                clocks->data[i].clocks_in_khz =
-                       dpm_table->dpm_levels[i].value * 100;
+                       dpm_table->dpm_levels[i].value * 1000;
                clocks->data[i].latency_in_us = 0;
        }
 
@@ -2383,7 +2399,7 @@ static int vega20_get_memclocks(struct pp_hwmgr *hwmgr,
        for (i = 0; i < count; i++) {
                clocks->data[i].clocks_in_khz =
                        data->mclk_latency_table.entries[i].frequency =
-                       dpm_table->dpm_levels[i].value * 100;
+                       dpm_table->dpm_levels[i].value * 1000;
                clocks->data[i].latency_in_us =
                        data->mclk_latency_table.entries[i].latency =
                        vega20_get_mem_latency(hwmgr, dpm_table->dpm_levels[i].value);
@@ -2408,7 +2424,7 @@ static int vega20_get_dcefclocks(struct pp_hwmgr *hwmgr,
 
        for (i = 0; i < count; i++) {
                clocks->data[i].clocks_in_khz =
-                       dpm_table->dpm_levels[i].value * 100;
+                       dpm_table->dpm_levels[i].value * 1000;
                clocks->data[i].latency_in_us = 0;
        }
 
@@ -2431,7 +2447,7 @@ static int vega20_get_socclocks(struct pp_hwmgr *hwmgr,
 
        for (i = 0; i < count; i++) {
                clocks->data[i].clocks_in_khz =
-                       dpm_table->dpm_levels[i].value * 100;
+                       dpm_table->dpm_levels[i].value * 1000;
                clocks->data[i].latency_in_us = 0;
        }
 
@@ -2582,11 +2598,11 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
                                return -EINVAL;
                        }
 
-                       if (input_clk < clocks.data[0].clocks_in_khz / 100 ||
+                       if (input_clk < clocks.data[0].clocks_in_khz / 1000 ||
                            input_clk > od8_settings[OD8_SETTING_UCLK_FMAX].max_value) {
                                pr_info("clock freq %d is not within allowed range [%d - %d]\n",
                                        input_clk,
-                                       clocks.data[0].clocks_in_khz / 100,
+                                       clocks.data[0].clocks_in_khz / 1000,
                                        od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
                                return -EINVAL;
                        }
@@ -2726,7 +2742,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 
        switch (type) {
        case PP_SCLK:
-               ret = vega20_get_current_gfx_clk_freq(hwmgr, &now);
+               ret = vega20_get_current_clk_freq(hwmgr, PPCLK_GFXCLK, &now);
                PP_ASSERT_WITH_CODE(!ret,
                                "Attempt to get current gfx clk Failed!",
                                return ret);
@@ -2738,12 +2754,12 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 
                for (i = 0; i < clocks.num_levels; i++)
                        size += sprintf(buf + size, "%d: %uMhz %s\n",
-                               i, clocks.data[i].clocks_in_khz / 100,
+                               i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz == now) ? "*" : "");
                break;
 
        case PP_MCLK:
-               ret = vega20_get_current_mclk_freq(hwmgr, &now);
+               ret = vega20_get_current_clk_freq(hwmgr, PPCLK_UCLK, &now);
                PP_ASSERT_WITH_CODE(!ret,
                                "Attempt to get current mclk freq Failed!",
                                return ret);
@@ -2755,7 +2771,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 
                for (i = 0; i < clocks.num_levels; i++)
                        size += sprintf(buf + size, "%d: %uMhz %s\n",
-                               i, clocks.data[i].clocks_in_khz / 100,
+                               i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz == now) ? "*" : "");
                break;
 
@@ -2820,7 +2836,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                        return ret);
 
                        size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
-                               clocks.data[0].clocks_in_khz / 100,
+                               clocks.data[0].clocks_in_khz / 1000,
                                od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
                }
 
@@ -3476,6 +3492,8 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = {
                vega20_set_watermarks_for_clocks_ranges,
        .display_clock_voltage_request =
                vega20_display_clock_voltage_request,
+       .get_performance_level =
+               vega20_get_performance_level,
        /* UMD pstate, profile related */
        .force_dpm_level =
                vega20_dpm_force_dpm_level,
index 56fe6a0d42e804f956846faad473071335b3b887..25faaa5c5b10cbc5fcd720d7b15546a826641682 100644 (file)
@@ -328,6 +328,7 @@ struct vega20_registry_data {
        uint8_t   disable_auto_wattman;
        uint32_t  auto_wattman_debug;
        uint32_t  auto_wattman_sample_period;
+       uint32_t  fclk_gfxclk_ratio;
        uint8_t   auto_wattman_threshold;
        uint8_t   log_avfs_param;
        uint8_t   enable_enginess;
index e5f7f82300659f18999a29442166442be071010a..97f8a1a970c37e124c8e5b07727f7ce6e32e8849 100644 (file)
@@ -642,8 +642,14 @@ static int check_powerplay_tables(
                "Unsupported PPTable format!", return -1);
        PP_ASSERT_WITH_CODE(powerplay_table->sHeader.structuresize > 0,
                "Invalid PowerPlay Table!", return -1);
-       PP_ASSERT_WITH_CODE(powerplay_table->smcPPTable.Version == PPTABLE_V20_SMU_VERSION,
-               "Unmatch PPTable version, vbios update may be needed!", return -1);
+
+       if (powerplay_table->smcPPTable.Version != PPTABLE_V20_SMU_VERSION) {
+               pr_info("Unmatch PPTable version: "
+                       "pptable from VBIOS is V%d while driver supported is V%d!",
+                       powerplay_table->smcPPTable.Version,
+                       PPTABLE_V20_SMU_VERSION);
+               return -EINVAL;
+       }
 
        //dump_pptable(&powerplay_table->smcPPTable);
 
@@ -716,10 +722,6 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
                "[appendVbiosPPTable] Failed to retrieve Smc Dpm Table from VBIOS!",
                return -1);
 
-       memset(ppsmc_pptable->Padding32,
-                       0,
-                       sizeof(struct atom_smc_dpm_info_v4_4) -
-                       sizeof(struct atom_common_table_header));
        ppsmc_pptable->MaxVoltageStepGfx = smc_dpm_table->maxvoltagestepgfx;
        ppsmc_pptable->MaxVoltageStepSoc = smc_dpm_table->maxvoltagestepsoc;
 
@@ -778,22 +780,19 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
        ppsmc_pptable->FllGfxclkSpreadPercent = smc_dpm_table->fllgfxclkspreadpercent;
        ppsmc_pptable->FllGfxclkSpreadFreq = smc_dpm_table->fllgfxclkspreadfreq;
 
-       if ((smc_dpm_table->table_header.format_revision == 4) &&
-           (smc_dpm_table->table_header.content_revision == 4)) {
-               for (i = 0; i < I2C_CONTROLLER_NAME_COUNT; i++) {
-                       ppsmc_pptable->I2cControllers[i].Enabled =
-                               smc_dpm_table->i2ccontrollers[i].enabled;
-                       ppsmc_pptable->I2cControllers[i].SlaveAddress =
-                               smc_dpm_table->i2ccontrollers[i].slaveaddress;
-                       ppsmc_pptable->I2cControllers[i].ControllerPort =
-                               smc_dpm_table->i2ccontrollers[i].controllerport;
-                       ppsmc_pptable->I2cControllers[i].ThermalThrottler =
-                               smc_dpm_table->i2ccontrollers[i].thermalthrottler;
-                       ppsmc_pptable->I2cControllers[i].I2cProtocol =
-                               smc_dpm_table->i2ccontrollers[i].i2cprotocol;
-                       ppsmc_pptable->I2cControllers[i].I2cSpeed =
-                               smc_dpm_table->i2ccontrollers[i].i2cspeed;
-               }
+       for (i = 0; i < I2C_CONTROLLER_NAME_COUNT; i++) {
+               ppsmc_pptable->I2cControllers[i].Enabled =
+                       smc_dpm_table->i2ccontrollers[i].enabled;
+               ppsmc_pptable->I2cControllers[i].SlaveAddress =
+                       smc_dpm_table->i2ccontrollers[i].slaveaddress;
+               ppsmc_pptable->I2cControllers[i].ControllerPort =
+                       smc_dpm_table->i2ccontrollers[i].controllerport;
+               ppsmc_pptable->I2cControllers[i].ThermalThrottler =
+                       smc_dpm_table->i2ccontrollers[i].thermalthrottler;
+               ppsmc_pptable->I2cControllers[i].I2cProtocol =
+                       smc_dpm_table->i2ccontrollers[i].i2cprotocol;
+               ppsmc_pptable->I2cControllers[i].I2cSpeed =
+                       smc_dpm_table->i2ccontrollers[i].i2cspeed;
        }
 
        return 0;
@@ -882,15 +881,10 @@ static int init_powerplay_table_information(
        if (pptable_information->smc_pptable == NULL)
                return -ENOMEM;
 
-       if (powerplay_table->smcPPTable.Version <= 2)
-               memcpy(pptable_information->smc_pptable,
-                               &(powerplay_table->smcPPTable),
-                               sizeof(PPTable_t) -
-                               sizeof(I2cControllerConfig_t) * I2C_CONTROLLER_NAME_COUNT);
-       else
-               memcpy(pptable_information->smc_pptable,
-                               &(powerplay_table->smcPPTable),
-                               sizeof(PPTable_t));
+       memcpy(pptable_information->smc_pptable,
+                       &(powerplay_table->smcPPTable),
+                       sizeof(PPTable_t));
+
 
        result = append_vbios_pptable(hwmgr, (pptable_information->smc_pptable));
 
index 2998a49960ede1d47975675afa8c1a290d0461c9..63d5cf69154967b90aa696de2ae5c1d407bd579f 100644 (file)
@@ -29,7 +29,7 @@
 // any structure is changed in this file
 #define SMU11_DRIVER_IF_VERSION 0x12
 
-#define PPTABLE_V20_SMU_VERSION 2
+#define PPTABLE_V20_SMU_VERSION 3
 
 #define NUM_GFXCLK_DPM_LEVELS  16
 #define NUM_VCLK_DPM_LEVELS    8
index 45d64a81e94539fe403087cd2827b2992e59edca..4f63a736ea0e7371b6f09b26ea8cc55ec6b9bdd0 100644 (file)
 #define PPSMC_MSG_SetSystemVirtualDramAddrHigh   0x4B
 #define PPSMC_MSG_SetSystemVirtualDramAddrLow    0x4C
 #define PPSMC_MSG_WaflTest                       0x4D
-// Unused ID 0x4E to 0x50
+#define PPSMC_MSG_SetFclkGfxClkRatio             0x4E
+// Unused ID 0x4F to 0x50
 #define PPSMC_MSG_AllowGfxOff                    0x51
 #define PPSMC_MSG_DisallowGfxOff                 0x52
 #define PPSMC_MSG_GetPptLimit                    0x53
index f836d30fdd4428b166591676f69f588f43f852fb..09b844ec3eabae4f09f8c0d10ed84d53fcc75ee2 100644 (file)
@@ -71,7 +71,11 @@ static int smu8_send_msg_to_smc_async(struct pp_hwmgr *hwmgr, uint16_t msg)
        result = PHM_WAIT_FIELD_UNEQUAL(hwmgr,
                                        SMU_MP1_SRBM2P_RESP_0, CONTENT, 0);
        if (result != 0) {
+               /* Read the last message to SMU, to report actual cause */
+               uint32_t val = cgs_read_register(hwmgr->device,
+                                                mmSMU_MP1_SRBM2P_MSG_0);
                pr_err("smu8_send_msg_to_smc_async (0x%04x) failed\n", msg);
+               pr_err("SMU still servicing msg (0x%04x)\n", val);
                return result;
        }
 
index f8a931cf3665e8bac6a02017760c33d790d530a0..680566d97adcf652b52eb886fe54326d95817d01 100644 (file)
@@ -458,18 +458,6 @@ static void ti_sn_bridge_enable(struct drm_bridge *bridge)
        unsigned int val;
        int ret;
 
-       /*
-        * FIXME:
-        * This 70ms was found necessary by experimentation. If it's not
-        * present, link training fails. It seems like it can go anywhere from
-        * pre_enable() up to semi-auto link training initiation below.
-        *
-        * Neither the datasheet for the bridge nor the panel tested mention a
-        * delay of this magnitude in the timing requirements. So for now, add
-        * the mystery delay until someone figures out a better fix.
-        */
-       msleep(70);
-
        /* DSI_A lane config */
        val = CHA_DSI_LANES(4 - pdata->dsi->lanes);
        regmap_update_bits(pdata->regmap, SN_DSI_LANES_REG,
@@ -536,7 +524,22 @@ static void ti_sn_bridge_pre_enable(struct drm_bridge *bridge)
        /* configure bridge ref_clk */
        ti_sn_bridge_set_refclk_freq(pdata);
 
-       /* in case drm_panel is connected then HPD is not supported */
+       /*
+        * HPD on this bridge chip is a bit useless.  This is an eDP bridge
+        * so the HPD is an internal signal that's only there to signal that
+        * the panel is done powering up.  ...but the bridge chip debounces
+        * this signal by between 100 ms and 400 ms (depending on process,
+        * voltage, and temperate--I measured it at about 200 ms).  One
+        * particular panel asserted HPD 84 ms after it was powered on meaning
+        * that we saw HPD 284 ms after power on.  ...but the same panel said
+        * that instead of looking at HPD you could just hardcode a delay of
+        * 200 ms.  We'll assume that the panel driver will have the hardcoded
+        * delay in its prepare and always disable HPD.
+        *
+        * If HPD somehow makes sense on some future panel we'll have to
+        * change this to be conditional on someone specifying that HPD should
+        * be used.
+        */
        regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE,
                           HPD_DISABLE);
 
index 701cb334e1ea314917488b6ecde68402fb462d87..d8b526b7932c3db06e1ff7cad3f2485ddfe52cee 100644 (file)
@@ -308,6 +308,26 @@ update_connector_routing(struct drm_atomic_state *state,
                return 0;
        }
 
+       crtc_state = drm_atomic_get_new_crtc_state(state,
+                                                  new_connector_state->crtc);
+       /*
+        * For compatibility with legacy users, we want to make sure that
+        * we allow DPMS On->Off modesets on unregistered connectors. Modesets
+        * which would result in anything else must be considered invalid, to
+        * avoid turning on new displays on dead connectors.
+        *
+        * Since the connector can be unregistered at any point during an
+        * atomic check or commit, this is racy. But that's OK: all we care
+        * about is ensuring that userspace can't do anything but shut off the
+        * display on a connector that was destroyed after its been notified,
+        * not before.
+        */
+       if (drm_connector_is_unregistered(connector) && crtc_state->active) {
+               DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
+                                connector->base.id, connector->name);
+               return -EINVAL;
+       }
+
        funcs = connector->helper_private;
 
        if (funcs->atomic_best_encoder)
@@ -352,7 +372,6 @@ update_connector_routing(struct drm_atomic_state *state,
 
        set_best_encoder(state, new_connector_state, new_encoder);
 
-       crtc_state = drm_atomic_get_new_crtc_state(state, new_connector_state->crtc);
        crtc_state->connectors_changed = true;
 
        DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n",
index 1e40e5decbe91a4f709305d5259b5b7c0425e617..4943cef178beb7675ab46a0d3100e33ae836bd0e 100644 (file)
@@ -379,7 +379,8 @@ void drm_connector_cleanup(struct drm_connector *connector)
        /* The connector should have been removed from userspace long before
         * it is finally destroyed.
         */
-       if (WARN_ON(connector->registered))
+       if (WARN_ON(connector->registration_state ==
+                   DRM_CONNECTOR_REGISTERED))
                drm_connector_unregister(connector);
 
        if (connector->tile_group) {
@@ -436,7 +437,7 @@ int drm_connector_register(struct drm_connector *connector)
                return 0;
 
        mutex_lock(&connector->mutex);
-       if (connector->registered)
+       if (connector->registration_state != DRM_CONNECTOR_INITIALIZING)
                goto unlock;
 
        ret = drm_sysfs_connector_add(connector);
@@ -456,7 +457,7 @@ int drm_connector_register(struct drm_connector *connector)
 
        drm_mode_object_register(connector->dev, &connector->base);
 
-       connector->registered = true;
+       connector->registration_state = DRM_CONNECTOR_REGISTERED;
        goto unlock;
 
 err_debugfs:
@@ -478,7 +479,7 @@ EXPORT_SYMBOL(drm_connector_register);
 void drm_connector_unregister(struct drm_connector *connector)
 {
        mutex_lock(&connector->mutex);
-       if (!connector->registered) {
+       if (connector->registration_state != DRM_CONNECTOR_REGISTERED) {
                mutex_unlock(&connector->mutex);
                return;
        }
@@ -489,7 +490,7 @@ void drm_connector_unregister(struct drm_connector *connector)
        drm_sysfs_connector_remove(connector);
        drm_debugfs_connector_remove(connector);
 
-       connector->registered = false;
+       connector->registration_state = DRM_CONNECTOR_UNREGISTERED;
        mutex_unlock(&connector->mutex);
 }
 EXPORT_SYMBOL(drm_connector_unregister);
index ff0bfc65a8c1dbbbbe99ac77aeb2e122cdaf3026..b506e3622b08f64d5e1bb6d9ece2c716580883a8 100644 (file)
@@ -122,6 +122,9 @@ static const struct edid_quirk {
        /* SDC panel of Lenovo B50-80 reports 8 bpc, but is a 6 bpc panel */
        { "SDC", 0x3652, EDID_QUIRK_FORCE_6BPC },
 
+       /* BOE model 0x0771 reports 8 bpc, but is a 6 bpc panel */
+       { "BOE", 0x0771, EDID_QUIRK_FORCE_6BPC },
+
        /* Belinea 10 15 55 */
        { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
        { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
index e7c3ed6c9a2e10ddcd7665e851a1bffb9ff0247f..9b476368aa313efd7c33945aeadd2b4c481c70d2 100644 (file)
@@ -93,7 +93,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
         * If the GPU managed to complete this jobs fence, the timout is
         * spurious. Bail out.
         */
-       if (fence_completed(gpu, submit->out_fence->seqno))
+       if (dma_fence_is_signaled(submit->out_fence))
                return;
 
        /*
index 94529aa8233922b71cc36011fff305280651be53..aef487dd873153d77fd602726ab6bd92256ab593 100644 (file)
@@ -164,13 +164,6 @@ static u32 decon_get_frame_count(struct decon_context *ctx, bool end)
        return frm;
 }
 
-static u32 decon_get_vblank_counter(struct exynos_drm_crtc *crtc)
-{
-       struct decon_context *ctx = crtc->ctx;
-
-       return decon_get_frame_count(ctx, false);
-}
-
 static void decon_setup_trigger(struct decon_context *ctx)
 {
        if (!ctx->crtc->i80_mode && !(ctx->out_type & I80_HW_TRG))
@@ -536,7 +529,6 @@ static const struct exynos_drm_crtc_ops decon_crtc_ops = {
        .disable                = decon_disable,
        .enable_vblank          = decon_enable_vblank,
        .disable_vblank         = decon_disable_vblank,
-       .get_vblank_counter     = decon_get_vblank_counter,
        .atomic_begin           = decon_atomic_begin,
        .update_plane           = decon_update_plane,
        .disable_plane          = decon_disable_plane,
@@ -554,7 +546,6 @@ static int decon_bind(struct device *dev, struct device *master, void *data)
        int ret;
 
        ctx->drm_dev = drm_dev;
-       drm_dev->max_vblank_count = 0xffffffff;
 
        for (win = ctx->first_win; win < WINDOWS_NR; win++) {
                ctx->configs[win].pixel_formats = decon_formats;
index eea90251808fa2e58398fdcb1cac01d160307320..2696289ecc78f204fb504f24c4f897694acb41df 100644 (file)
@@ -162,16 +162,6 @@ static void exynos_drm_crtc_disable_vblank(struct drm_crtc *crtc)
                exynos_crtc->ops->disable_vblank(exynos_crtc);
 }
 
-static u32 exynos_drm_crtc_get_vblank_counter(struct drm_crtc *crtc)
-{
-       struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
-
-       if (exynos_crtc->ops->get_vblank_counter)
-               return exynos_crtc->ops->get_vblank_counter(exynos_crtc);
-
-       return 0;
-}
-
 static const struct drm_crtc_funcs exynos_crtc_funcs = {
        .set_config     = drm_atomic_helper_set_config,
        .page_flip      = drm_atomic_helper_page_flip,
@@ -181,7 +171,6 @@ static const struct drm_crtc_funcs exynos_crtc_funcs = {
        .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
        .enable_vblank = exynos_drm_crtc_enable_vblank,
        .disable_vblank = exynos_drm_crtc_disable_vblank,
-       .get_vblank_counter = exynos_drm_crtc_get_vblank_counter,
 };
 
 struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev,
index ec9604f1272b50d12b44a9f759693dc4a8eaebf6..5e61e707f95555da181969aa71254fc71ffa4098 100644 (file)
@@ -135,7 +135,6 @@ struct exynos_drm_crtc_ops {
        void (*disable)(struct exynos_drm_crtc *crtc);
        int (*enable_vblank)(struct exynos_drm_crtc *crtc);
        void (*disable_vblank)(struct exynos_drm_crtc *crtc);
-       u32 (*get_vblank_counter)(struct exynos_drm_crtc *crtc);
        enum drm_mode_status (*mode_valid)(struct exynos_drm_crtc *crtc,
                const struct drm_display_mode *mode);
        bool (*mode_fixup)(struct exynos_drm_crtc *crtc,
index 07af7758066db47c866a86a2be8fdfe5386421a5..d81e62ae286aea79d39757ecb2233608b905d75f 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_atomic_helper.h>
@@ -1474,12 +1475,12 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder)
 {
        struct exynos_dsi *dsi = encoder_to_dsi(encoder);
        struct drm_connector *connector = &dsi->connector;
+       struct drm_device *drm = encoder->dev;
        int ret;
 
        connector->polled = DRM_CONNECTOR_POLL_HPD;
 
-       ret = drm_connector_init(encoder->dev, connector,
-                                &exynos_dsi_connector_funcs,
+       ret = drm_connector_init(drm, connector, &exynos_dsi_connector_funcs,
                                 DRM_MODE_CONNECTOR_DSI);
        if (ret) {
                DRM_ERROR("Failed to initialize connector with drm\n");
@@ -1489,7 +1490,12 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder)
        connector->status = connector_status_disconnected;
        drm_connector_helper_add(connector, &exynos_dsi_connector_helper_funcs);
        drm_connector_attach_encoder(connector, encoder);
+       if (!drm->registered)
+               return 0;
 
+       connector->funcs->reset(connector);
+       drm_fb_helper_add_one_connector(drm->fb_helper, connector);
+       drm_connector_register(connector);
        return 0;
 }
 
@@ -1527,7 +1533,9 @@ static int exynos_dsi_host_attach(struct mipi_dsi_host *host,
                }
 
                dsi->panel = of_drm_find_panel(device->dev.of_node);
-               if (dsi->panel) {
+               if (IS_ERR(dsi->panel)) {
+                       dsi->panel = NULL;
+               } else {
                        drm_panel_attach(dsi->panel, &dsi->connector);
                        dsi->connector.status = connector_status_connected;
                }
index 918dd2c822098444c6708761baded3ef95420025..01d182289efa38fd75a83cd399b02bbfd1e590c0 100644 (file)
@@ -192,7 +192,7 @@ int exynos_drm_fbdev_init(struct drm_device *dev)
        struct drm_fb_helper *helper;
        int ret;
 
-       if (!dev->mode_config.num_crtc || !dev->mode_config.num_connector)
+       if (!dev->mode_config.num_crtc)
                return 0;
 
        fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
index 2402395a068da2fc5e83ba988aa76afccfe09f03..58e166effa456426d7fe2ee4f059a0533b4bfe0c 100644 (file)
@@ -1905,7 +1905,6 @@ static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
                vgpu_free_mm(mm);
                return ERR_PTR(-ENOMEM);
        }
-       mm->ggtt_mm.last_partial_off = -1UL;
 
        return mm;
 }
@@ -1930,7 +1929,6 @@ void _intel_vgpu_mm_release(struct kref *mm_ref)
                invalidate_ppgtt_mm(mm);
        } else {
                vfree(mm->ggtt_mm.virtual_ggtt);
-               mm->ggtt_mm.last_partial_off = -1UL;
        }
 
        vgpu_free_mm(mm);
@@ -2168,6 +2166,8 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
        struct intel_gvt_gtt_entry e, m;
        dma_addr_t dma_addr;
        int ret;
+       struct intel_gvt_partial_pte *partial_pte, *pos, *n;
+       bool partial_update = false;
 
        if (bytes != 4 && bytes != 8)
                return -EINVAL;
@@ -2178,68 +2178,57 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
        if (!vgpu_gmadr_is_valid(vgpu, gma))
                return 0;
 
-       ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
-
+       e.type = GTT_TYPE_GGTT_PTE;
        memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
                        bytes);
 
        /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
-        * write, we assume the two 4 bytes writes are consecutive.
-        * Otherwise, we abort and report error
+        * write, save the first 4 bytes in a list and update virtual
+        * PTE. Only update shadow PTE when the second 4 bytes comes.
         */
        if (bytes < info->gtt_entry_size) {
-               if (ggtt_mm->ggtt_mm.last_partial_off == -1UL) {
-                       /* the first partial part*/
-                       ggtt_mm->ggtt_mm.last_partial_off = off;
-                       ggtt_mm->ggtt_mm.last_partial_data = e.val64;
-                       return 0;
-               } else if ((g_gtt_index ==
-                               (ggtt_mm->ggtt_mm.last_partial_off >>
-                               info->gtt_entry_size_shift)) &&
-                       (off != ggtt_mm->ggtt_mm.last_partial_off)) {
-                       /* the second partial part */
-
-                       int last_off = ggtt_mm->ggtt_mm.last_partial_off &
-                               (info->gtt_entry_size - 1);
-
-                       memcpy((void *)&e.val64 + last_off,
-                               (void *)&ggtt_mm->ggtt_mm.last_partial_data +
-                               last_off, bytes);
-
-                       ggtt_mm->ggtt_mm.last_partial_off = -1UL;
-               } else {
-                       int last_offset;
-
-                       gvt_vgpu_err("failed to populate guest ggtt entry: abnormal ggtt entry write sequence, last_partial_off=%lx, offset=%x, bytes=%d, ggtt entry size=%d\n",
-                                       ggtt_mm->ggtt_mm.last_partial_off, off,
-                                       bytes, info->gtt_entry_size);
-
-                       /* set host ggtt entry to scratch page and clear
-                        * virtual ggtt entry as not present for last
-                        * partially write offset
-                        */
-                       last_offset = ggtt_mm->ggtt_mm.last_partial_off &
-                                       (~(info->gtt_entry_size - 1));
-
-                       ggtt_get_host_entry(ggtt_mm, &m, last_offset);
-                       ggtt_invalidate_pte(vgpu, &m);
-                       ops->set_pfn(&m, gvt->gtt.scratch_mfn);
-                       ops->clear_present(&m);
-                       ggtt_set_host_entry(ggtt_mm, &m, last_offset);
-                       ggtt_invalidate(gvt->dev_priv);
-
-                       ggtt_get_guest_entry(ggtt_mm, &e, last_offset);
-                       ops->clear_present(&e);
-                       ggtt_set_guest_entry(ggtt_mm, &e, last_offset);
-
-                       ggtt_mm->ggtt_mm.last_partial_off = off;
-                       ggtt_mm->ggtt_mm.last_partial_data = e.val64;
+               bool found = false;
+
+               list_for_each_entry_safe(pos, n,
+                               &ggtt_mm->ggtt_mm.partial_pte_list, list) {
+                       if (g_gtt_index == pos->offset >>
+                                       info->gtt_entry_size_shift) {
+                               if (off != pos->offset) {
+                                       /* the second partial part*/
+                                       int last_off = pos->offset &
+                                               (info->gtt_entry_size - 1);
+
+                                       memcpy((void *)&e.val64 + last_off,
+                                               (void *)&pos->data + last_off,
+                                               bytes);
+
+                                       list_del(&pos->list);
+                                       kfree(pos);
+                                       found = true;
+                                       break;
+                               }
+
+                               /* update of the first partial part */
+                               pos->data = e.val64;
+                               ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
+                               return 0;
+                       }
+               }
 
-                       return 0;
+               if (!found) {
+                       /* the first partial part */
+                       partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
+                       if (!partial_pte)
+                               return -ENOMEM;
+                       partial_pte->offset = off;
+                       partial_pte->data = e.val64;
+                       list_add_tail(&partial_pte->list,
+                               &ggtt_mm->ggtt_mm.partial_pte_list);
+                       partial_update = true;
                }
        }
 
-       if (ops->test_present(&e)) {
+       if (!partial_update && (ops->test_present(&e))) {
                gfn = ops->get_pfn(&e);
                m = e;
 
@@ -2263,16 +2252,18 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
                } else
                        ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
        } else {
-               ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index);
-               ggtt_invalidate_pte(vgpu, &m);
                ops->set_pfn(&m, gvt->gtt.scratch_mfn);
                ops->clear_present(&m);
        }
 
 out:
+       ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
+
+       ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
+       ggtt_invalidate_pte(vgpu, &e);
+
        ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
        ggtt_invalidate(gvt->dev_priv);
-       ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
        return 0;
 }
 
@@ -2430,6 +2421,8 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
 
        intel_vgpu_reset_ggtt(vgpu, false);
 
+       INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
+
        return create_scratch_page_tree(vgpu);
 }
 
@@ -2454,6 +2447,14 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
 
 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
 {
+       struct intel_gvt_partial_pte *pos;
+
+       list_for_each_entry(pos,
+                       &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, list) {
+               gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
+                       pos->offset, pos->data);
+               kfree(pos);
+       }
        intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
        vgpu->gtt.ggtt_mm = NULL;
 }
index 7a9b36176efb7fca7198527512f8873ad21248cb..d8cb04cc946dff3e19466ff387089db96c226d53 100644 (file)
@@ -35,7 +35,6 @@
 #define _GVT_GTT_H_
 
 #define I915_GTT_PAGE_SHIFT         12
-#define I915_GTT_PAGE_MASK             (~(I915_GTT_PAGE_SIZE - 1))
 
 struct intel_vgpu_mm;
 
@@ -133,6 +132,12 @@ enum intel_gvt_mm_type {
 
 #define GVT_RING_CTX_NR_PDPS   GEN8_3LVL_PDPES
 
+struct intel_gvt_partial_pte {
+       unsigned long offset;
+       u64 data;
+       struct list_head list;
+};
+
 struct intel_vgpu_mm {
        enum intel_gvt_mm_type type;
        struct intel_vgpu *vgpu;
@@ -157,8 +162,7 @@ struct intel_vgpu_mm {
                } ppgtt_mm;
                struct {
                        void *virtual_ggtt;
-                       unsigned long last_partial_off;
-                       u64 last_partial_data;
+                       struct list_head partial_pte_list;
                } ggtt_mm;
        };
 };
index 90f50f67909a090d72b4cee84077d0b530a4969b..aa280bb071254547fd3d810494bd488d4edbcd44 100644 (file)
@@ -1609,7 +1609,7 @@ static int bxt_gt_disp_pwron_write(struct intel_vgpu *vgpu,
        return 0;
 }
 
-static int bxt_edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
+static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
                unsigned int offset, void *p_data, unsigned int bytes)
 {
        vgpu_vreg(vgpu, offset) = 0;
@@ -2607,6 +2607,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
        MMIO_DFH(_MMIO(0x1a178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
        MMIO_DFH(_MMIO(0x1a17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
        MMIO_DFH(_MMIO(0x2217c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+       MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
+       MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
        return 0;
 }
 
@@ -3205,9 +3208,6 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt)
        MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_B), D_BXT);
        MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_C), D_BXT);
 
-       MMIO_DH(EDP_PSR_IMR, D_BXT, NULL, bxt_edp_psr_imr_iir_write);
-       MMIO_DH(EDP_PSR_IIR, D_BXT, NULL, bxt_edp_psr_imr_iir_write);
-
        MMIO_D(RC6_CTX_BASE, D_BXT);
 
        MMIO_D(GEN8_PUSHBUS_CONTROL, D_BXT);
index 10e63eea5492916f676011c98ab1751e9d02dac1..36a5147cd01e5224b2c6563c29128d05688e7fba 100644 (file)
@@ -131,7 +131,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
        {RCS, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */
 
        {RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
-       {RCS, GEN9_CSFE_CHICKEN1_RCS, 0x0, false}, /* 0x20d4 */
+       {RCS, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
 
        {RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
        {RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
index 44e2c0f5ec502bc1a6c27007c77d56df89019ce3..ffdbbac4400eaf7d86390a3ff105a18ef36645ea 100644 (file)
@@ -1175,8 +1175,6 @@ skl_dram_get_channels_info(struct drm_i915_private *dev_priv)
                return -EINVAL;
        }
 
-       dram_info->valid_dimm = true;
-
        /*
         * If any of the channel is single rank channel, worst case output
         * will be same as if single rank memory, so consider single rank
@@ -1193,8 +1191,7 @@ skl_dram_get_channels_info(struct drm_i915_private *dev_priv)
                return -EINVAL;
        }
 
-       if (ch0.is_16gb_dimm || ch1.is_16gb_dimm)
-               dram_info->is_16gb_dimm = true;
+       dram_info->is_16gb_dimm = ch0.is_16gb_dimm || ch1.is_16gb_dimm;
 
        dev_priv->dram_info.symmetric_memory = intel_is_dram_symmetric(val_ch0,
                                                                       val_ch1,
@@ -1314,7 +1311,6 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
                return -EINVAL;
        }
 
-       dram_info->valid_dimm = true;
        dram_info->valid = true;
        return 0;
 }
@@ -1327,12 +1323,17 @@ intel_get_dram_info(struct drm_i915_private *dev_priv)
        int ret;
 
        dram_info->valid = false;
-       dram_info->valid_dimm = false;
-       dram_info->is_16gb_dimm = false;
        dram_info->rank = I915_DRAM_RANK_INVALID;
        dram_info->bandwidth_kbps = 0;
        dram_info->num_channels = 0;
 
+       /*
+        * Assume 16Gb DIMMs are present until proven otherwise.
+        * This is only used for the level 0 watermark latency
+        * w/a which does not apply to bxt/glk.
+        */
+       dram_info->is_16gb_dimm = !IS_GEN9_LP(dev_priv);
+
        if (INTEL_GEN(dev_priv) < 9 || IS_GEMINILAKE(dev_priv))
                return;
 
index 8624b4bdc242dd7cbd77d527eb3b84fe59a0777f..9102571e9692d1540ad987ed31c4ec735dd80cf5 100644 (file)
@@ -1948,7 +1948,6 @@ struct drm_i915_private {
 
        struct dram_info {
                bool valid;
-               bool valid_dimm;
                bool is_16gb_dimm;
                u8 num_channels;
                enum dram_rank {
index 09187286d34627df882e4ede753db7e40da41934..1aaccbe7e1debd0c11440ac9acae9c15b07880d5 100644 (file)
@@ -460,7 +460,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
         * any non-page-aligned or non-canonical addresses.
         */
        if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
-                    entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK)))
+                    entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
                return -EINVAL;
 
        /* pad_to_size was once a reserved field, so sanitize it */
index 56c7f86373112b96212fc7952a5968854f9bdbb1..47c302543799007bee421d85039be8b016aed6e7 100644 (file)
@@ -1757,7 +1757,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m)
                        if (i == 4)
                                continue;
 
-                       seq_printf(m, "\t\t(%03d, %04d) %08lx: ",
+                       seq_printf(m, "\t\t(%03d, %04d) %08llx: ",
                                   pde, pte,
                                   (pde * GEN6_PTES + pte) * I915_GTT_PAGE_SIZE);
                        for (i = 0; i < 4; i++) {
index 7e2af5f4f39bcbb5ec355257d41decea7b45d019..28039290655cb7d5cf11f2c94260ba88c669e169 100644 (file)
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 
-#define I915_GTT_PAGE_SIZE_4K BIT(12)
-#define I915_GTT_PAGE_SIZE_64K BIT(16)
-#define I915_GTT_PAGE_SIZE_2M BIT(21)
+#define I915_GTT_PAGE_SIZE_4K  BIT_ULL(12)
+#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16)
+#define I915_GTT_PAGE_SIZE_2M  BIT_ULL(21)
 
 #define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
 #define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
 
+#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE
+
 #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
 
 #define I915_FENCE_REG_NONE -1
@@ -659,20 +661,20 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
                        u64 start, u64 end, unsigned int flags);
 
 /* Flags used by pin/bind&friends. */
-#define PIN_NONBLOCK           BIT(0)
-#define PIN_MAPPABLE           BIT(1)
-#define PIN_ZONE_4G            BIT(2)
-#define PIN_NONFAULT           BIT(3)
-#define PIN_NOEVICT            BIT(4)
-
-#define PIN_MBZ                        BIT(5) /* I915_VMA_PIN_OVERFLOW */
-#define PIN_GLOBAL             BIT(6) /* I915_VMA_GLOBAL_BIND */
-#define PIN_USER               BIT(7) /* I915_VMA_LOCAL_BIND */
-#define PIN_UPDATE             BIT(8)
-
-#define PIN_HIGH               BIT(9)
-#define PIN_OFFSET_BIAS                BIT(10)
-#define PIN_OFFSET_FIXED       BIT(11)
+#define PIN_NONBLOCK           BIT_ULL(0)
+#define PIN_MAPPABLE           BIT_ULL(1)
+#define PIN_ZONE_4G            BIT_ULL(2)
+#define PIN_NONFAULT           BIT_ULL(3)
+#define PIN_NOEVICT            BIT_ULL(4)
+
+#define PIN_MBZ                        BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */
+#define PIN_GLOBAL             BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */
+#define PIN_USER               BIT_ULL(7) /* I915_VMA_LOCAL_BIND */
+#define PIN_UPDATE             BIT_ULL(8)
+
+#define PIN_HIGH               BIT_ULL(9)
+#define PIN_OFFSET_BIAS                BIT_ULL(10)
+#define PIN_OFFSET_FIXED       BIT_ULL(11)
 #define PIN_OFFSET_MASK                (-I915_GTT_PAGE_SIZE)
 
 #endif
index 7c491ea3d052aaccfc5eab69e2ea8f6b31f6813e..e31c27e45734ef19ae3764b5894da3320f4b2991 100644 (file)
@@ -2095,8 +2095,12 @@ enum i915_power_well_id {
 
 /* ICL PHY DFLEX registers */
 #define PORT_TX_DFLEXDPMLE1            _MMIO(0x1638C0)
-#define   DFLEXDPMLE1_DPMLETC_MASK(n)  (0xf << (4 * (n)))
-#define   DFLEXDPMLE1_DPMLETC(n, x)    ((x) << (4 * (n)))
+#define   DFLEXDPMLE1_DPMLETC_MASK(tc_port)    (0xf << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML0(tc_port)     (1 << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML1_0(tc_port)   (3 << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML3(tc_port)     (8 << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML3_2(tc_port)   (12 << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML3_0(tc_port)   (15 << (4 * (tc_port)))
 
 /* BXT PHY Ref registers */
 #define _PORT_REF_DW3_A                        0x16218C
@@ -4593,12 +4597,12 @@ enum {
 
 #define  DRM_DIP_ENABLE                        (1 << 28)
 #define  PSR_VSC_BIT_7_SET             (1 << 27)
-#define  VSC_SELECT_MASK               (0x3 << 26)
-#define  VSC_SELECT_SHIFT              26
-#define  VSC_DIP_HW_HEA_DATA           (0 << 26)
-#define  VSC_DIP_HW_HEA_SW_DATA                (1 << 26)
-#define  VSC_DIP_HW_DATA_SW_HEA                (2 << 26)
-#define  VSC_DIP_SW_HEA_DATA           (3 << 26)
+#define  VSC_SELECT_MASK               (0x3 << 25)
+#define  VSC_SELECT_SHIFT              25
+#define  VSC_DIP_HW_HEA_DATA           (0 << 25)
+#define  VSC_DIP_HW_HEA_SW_DATA                (1 << 25)
+#define  VSC_DIP_HW_DATA_SW_HEA                (2 << 25)
+#define  VSC_DIP_SW_HEA_DATA           (3 << 25)
 #define  VDIP_ENABLE_PPS               (1 << 24)
 
 /* Panel power sequencing */
index 769f3f5866611174cbabeca5e4d1fbb0711b9b86..ee3ca2de983b96ea52ffda2c794963f2b23d705d 100644 (file)
@@ -144,6 +144,9 @@ static const struct {
 /* HDMI N/CTS table */
 #define TMDS_297M 297000
 #define TMDS_296M 296703
+#define TMDS_594M 594000
+#define TMDS_593M 593407
+
 static const struct {
        int sample_rate;
        int clock;
@@ -164,6 +167,20 @@ static const struct {
        { 176400, TMDS_297M, 18816, 247500 },
        { 192000, TMDS_296M, 23296, 281250 },
        { 192000, TMDS_297M, 20480, 247500 },
+       { 44100, TMDS_593M, 8918, 937500 },
+       { 44100, TMDS_594M, 9408, 990000 },
+       { 48000, TMDS_593M, 5824, 562500 },
+       { 48000, TMDS_594M, 6144, 594000 },
+       { 32000, TMDS_593M, 5824, 843750 },
+       { 32000, TMDS_594M, 3072, 445500 },
+       { 88200, TMDS_593M, 17836, 937500 },
+       { 88200, TMDS_594M, 18816, 990000 },
+       { 96000, TMDS_593M, 11648, 562500 },
+       { 96000, TMDS_594M, 12288, 594000 },
+       { 176400, TMDS_593M, 35672, 937500 },
+       { 176400, TMDS_594M, 37632, 990000 },
+       { 192000, TMDS_593M, 23296, 562500 },
+       { 192000, TMDS_594M, 24576, 594000 },
 };
 
 /* get AUD_CONFIG_PIXEL_CLOCK_HDMI_* value for mode */
index 29075c763428055ddb3625a80b59643e694f3d76..8d74276029e621f9ae7e6db54c815be5d2d0c9d1 100644 (file)
@@ -2138,16 +2138,8 @@ void intel_set_cdclk(struct drm_i915_private *dev_priv,
 static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv,
                                     int pixel_rate)
 {
-       if (INTEL_GEN(dev_priv) >= 10)
+       if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
                return DIV_ROUND_UP(pixel_rate, 2);
-       else if (IS_GEMINILAKE(dev_priv))
-               /*
-                * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk
-                * as a temporary workaround. Use a higher cdclk instead. (Note that
-                * intel_compute_max_dotclk() limits the max pixel clock to 99% of max
-                * cdclk.)
-                */
-               return DIV_ROUND_UP(pixel_rate * 100, 2 * 99);
        else if (IS_GEN9(dev_priv) ||
                 IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
                return pixel_rate;
@@ -2543,14 +2535,8 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
 {
        int max_cdclk_freq = dev_priv->max_cdclk_freq;
 
-       if (INTEL_GEN(dev_priv) >= 10)
+       if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
                return 2 * max_cdclk_freq;
-       else if (IS_GEMINILAKE(dev_priv))
-               /*
-                * FIXME: Limiting to 99% as a temporary workaround. See
-                * intel_min_cdclk() for details.
-                */
-               return 2 * max_cdclk_freq * 99 / 100;
        else if (IS_GEN9(dev_priv) ||
                 IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
                return max_cdclk_freq;
index 9741cc419e1b2bc1f5eb4771ae75468f0099289d..23d8008a93bb690caef898ca5235eeb9fdcd670a 100644 (file)
@@ -12768,17 +12768,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
                        intel_check_cpu_fifo_underruns(dev_priv);
                        intel_check_pch_fifo_underruns(dev_priv);
 
-                       if (!new_crtc_state->active) {
-                               /*
-                                * Make sure we don't call initial_watermarks
-                                * for ILK-style watermark updates.
-                                *
-                                * No clue what this is supposed to achieve.
-                                */
-                               if (INTEL_GEN(dev_priv) >= 9)
-                                       dev_priv->display.initial_watermarks(intel_state,
-                                                                            to_intel_crtc_state(new_crtc_state));
-                       }
+                       /* FIXME unify this for all platforms */
+                       if (!new_crtc_state->active &&
+                           !HAS_GMCH_DISPLAY(dev_priv) &&
+                           dev_priv->display.initial_watermarks)
+                               dev_priv->display.initial_watermarks(intel_state,
+                                                                    to_intel_crtc_state(new_crtc_state));
                }
        }
 
@@ -14646,7 +14641,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
             fb->height < SKL_MIN_YUV_420_SRC_H ||
             (fb->width % 4) != 0 || (fb->height % 4) != 0)) {
                DRM_DEBUG_KMS("src dimensions not correct for NV12\n");
-               return -EINVAL;
+               goto err;
        }
 
        for (i = 0; i < fb->format->num_planes; i++) {
index 3fae4dab295f093892491ecd010970dc7fc28128..13f9b56a9ce7ca711467fc9309b720f8d9565661 100644 (file)
@@ -5102,19 +5102,13 @@ intel_dp_long_pulse(struct intel_connector *connector,
                 */
                status = connector_status_disconnected;
                goto out;
-       } else {
-               /*
-                * If display is now connected check links status,
-                * there has been known issues of link loss triggering
-                * long pulse.
-                *
-                * Some sinks (eg. ASUS PB287Q) seem to perform some
-                * weird HPD ping pong during modesets. So we can apparently
-                * end up with HPD going low during a modeset, and then
-                * going back up soon after. And once that happens we must
-                * retrain the link to get a picture. That's in case no
-                * userspace component reacted to intermittent HPD dip.
-                */
+       }
+
+       /*
+        * Some external monitors do not signal loss of link synchronization
+        * with an IRQ_HPD, so force a link status check.
+        */
+       if (!intel_dp_is_edp(intel_dp)) {
                struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
 
                intel_dp_retrain_link(encoder, ctx);
index 7f155b4f1a7d7ab9a3389181d411140951962d6e..1b00f8ea145ba3990d17f6e7142755bae8ca6a77 100644 (file)
@@ -77,7 +77,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
        pipe_config->pbn = mst_pbn;
 
        /* Zombie connectors can't have VCPI slots */
-       if (READ_ONCE(connector->registered)) {
+       if (!drm_connector_is_unregistered(connector)) {
                slots = drm_dp_atomic_find_vcpi_slots(state,
                                                      &intel_dp->mst_mgr,
                                                      port,
@@ -313,7 +313,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector)
        struct edid *edid;
        int ret;
 
-       if (!READ_ONCE(connector->registered))
+       if (drm_connector_is_unregistered(connector))
                return intel_connector_update_modes(connector, NULL);
 
        edid = drm_dp_mst_get_edid(connector, &intel_dp->mst_mgr, intel_connector->port);
@@ -329,7 +329,7 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force)
        struct intel_connector *intel_connector = to_intel_connector(connector);
        struct intel_dp *intel_dp = intel_connector->mst_port;
 
-       if (!READ_ONCE(connector->registered))
+       if (drm_connector_is_unregistered(connector))
                return connector_status_disconnected;
        return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr,
                                      intel_connector->port);
@@ -372,7 +372,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
        int bpp = 24; /* MST uses fixed bpp */
        int max_rate, mode_rate, max_lanes, max_link_clock;
 
-       if (!READ_ONCE(connector->registered))
+       if (drm_connector_is_unregistered(connector))
                return MODE_ERROR;
 
        if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
index cdf19553ffacd28f1097bb2096b8cc35d4654b84..5d5336fbe7b05836b7bedc28bffbfef9e6b08b4f 100644 (file)
@@ -297,8 +297,10 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
        lpe_audio_platdev_destroy(dev_priv);
 
        irq_free_desc(dev_priv->lpe_audio.irq);
-}
 
+       dev_priv->lpe_audio.irq = -1;
+       dev_priv->lpe_audio.platdev = NULL;
+}
 
 /**
  * intel_lpe_audio_notify() - notify lpe audio event
index 1db9b8328275038f93661c0e743bc0598303d25b..245f0022bcfd00c730f020b24326bfba90de4ea2 100644 (file)
@@ -2881,8 +2881,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
                 * any underrun. If not able to get Dimm info assume 16GB dimm
                 * to avoid any underrun.
                 */
-               if (!dev_priv->dram_info.valid_dimm ||
-                   dev_priv->dram_info.is_16gb_dimm)
+               if (dev_priv->dram_info.is_16gb_dimm)
                        wm[0] += 1;
 
        } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
index 8d03f64eabd71d449ebedca890135d69dabb1a74..5c22f2c8d4cfee20337d484a9eb227c3c6adcb34 100644 (file)
@@ -551,7 +551,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
                        err = igt_check_page_sizes(vma);
 
                        if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) {
-                               pr_err("page_sizes.gtt=%u, expected %lu\n",
+                               pr_err("page_sizes.gtt=%u, expected %llu\n",
                                       vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K);
                                err = -EINVAL;
                        }
index 8e2e269db97e82917b299afbe680fc008b8c90a1..127d8151367177dea04bf5c121b2331e46d1accc 100644 (file)
@@ -1337,7 +1337,7 @@ static int igt_gtt_reserve(void *arg)
                GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
                if (vma->node.start != total ||
                    vma->node.size != 2*I915_GTT_PAGE_SIZE) {
-                       pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %lx)\n",
+                       pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %llx)\n",
                               vma->node.start, vma->node.size,
                               total, 2*I915_GTT_PAGE_SIZE);
                        err = -EINVAL;
@@ -1386,7 +1386,7 @@ static int igt_gtt_reserve(void *arg)
                GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
                if (vma->node.start != total ||
                    vma->node.size != 2*I915_GTT_PAGE_SIZE) {
-                       pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %lx)\n",
+                       pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %llx)\n",
                               vma->node.start, vma->node.size,
                               total, 2*I915_GTT_PAGE_SIZE);
                        err = -EINVAL;
@@ -1430,7 +1430,7 @@ static int igt_gtt_reserve(void *arg)
                GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
                if (vma->node.start != offset ||
                    vma->node.size != 2*I915_GTT_PAGE_SIZE) {
-                       pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %lx)\n",
+                       pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %llx)\n",
                               vma->node.start, vma->node.size,
                               offset, 2*I915_GTT_PAGE_SIZE);
                        err = -EINVAL;
index 6bb78076b5b5830c12c0db372bbaf684dd3d1fc3..6cbbae3f438bd0e44cbc01406687ed82170b7372 100644 (file)
@@ -881,22 +881,16 @@ nv50_mstc_atomic_best_encoder(struct drm_connector *connector,
 {
        struct nv50_head *head = nv50_head(connector_state->crtc);
        struct nv50_mstc *mstc = nv50_mstc(connector);
-       if (mstc->port) {
-               struct nv50_mstm *mstm = mstc->mstm;
-               return &mstm->msto[head->base.index]->encoder;
-       }
-       return NULL;
+
+       return &mstc->mstm->msto[head->base.index]->encoder;
 }
 
 static struct drm_encoder *
 nv50_mstc_best_encoder(struct drm_connector *connector)
 {
        struct nv50_mstc *mstc = nv50_mstc(connector);
-       if (mstc->port) {
-               struct nv50_mstm *mstm = mstc->mstm;
-               return &mstm->msto[0]->encoder;
-       }
-       return NULL;
+
+       return &mstc->mstm->msto[0]->encoder;
 }
 
 static enum drm_mode_status
index 97964f7f2acee08350101947a4ccd9a717f5f199..a04ffb3b21742a834c44c4770e2760f8139053dd 100644 (file)
@@ -56,6 +56,8 @@ struct panel_desc {
        /**
         * @prepare: the time (in milliseconds) that it takes for the panel to
         *           become ready and start receiving video data
+        * @hpd_absent_delay: Add this to the prepare delay if we know Hot
+        *                    Plug Detect isn't used.
         * @enable: the time (in milliseconds) that it takes for the panel to
         *          display the first valid frame after starting to receive
         *          video data
@@ -66,6 +68,7 @@ struct panel_desc {
         */
        struct {
                unsigned int prepare;
+               unsigned int hpd_absent_delay;
                unsigned int enable;
                unsigned int disable;
                unsigned int unprepare;
@@ -79,6 +82,7 @@ struct panel_simple {
        struct drm_panel base;
        bool prepared;
        bool enabled;
+       bool no_hpd;
 
        const struct panel_desc *desc;
 
@@ -202,6 +206,7 @@ static int panel_simple_unprepare(struct drm_panel *panel)
 static int panel_simple_prepare(struct drm_panel *panel)
 {
        struct panel_simple *p = to_panel_simple(panel);
+       unsigned int delay;
        int err;
 
        if (p->prepared)
@@ -215,8 +220,11 @@ static int panel_simple_prepare(struct drm_panel *panel)
 
        gpiod_set_value_cansleep(p->enable_gpio, 1);
 
-       if (p->desc->delay.prepare)
-               msleep(p->desc->delay.prepare);
+       delay = p->desc->delay.prepare;
+       if (p->no_hpd)
+               delay += p->desc->delay.hpd_absent_delay;
+       if (delay)
+               msleep(delay);
 
        p->prepared = true;
 
@@ -305,6 +313,8 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc)
        panel->prepared = false;
        panel->desc = desc;
 
+       panel->no_hpd = of_property_read_bool(dev->of_node, "no-hpd");
+
        panel->supply = devm_regulator_get(dev, "power");
        if (IS_ERR(panel->supply))
                return PTR_ERR(panel->supply);
@@ -1363,7 +1373,7 @@ static const struct panel_desc innolux_n156bge_l21 = {
        },
 };
 
-static const struct drm_display_mode innolux_tv123wam_mode = {
+static const struct drm_display_mode innolux_p120zdg_bf1_mode = {
        .clock = 206016,
        .hdisplay = 2160,
        .hsync_start = 2160 + 48,
@@ -1377,15 +1387,16 @@ static const struct drm_display_mode innolux_tv123wam_mode = {
        .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC,
 };
 
-static const struct panel_desc innolux_tv123wam = {
-       .modes = &innolux_tv123wam_mode,
+static const struct panel_desc innolux_p120zdg_bf1 = {
+       .modes = &innolux_p120zdg_bf1_mode,
        .num_modes = 1,
        .bpc = 8,
        .size = {
-               .width = 259,
-               .height = 173,
+               .width = 254,
+               .height = 169,
        },
        .delay = {
+               .hpd_absent_delay = 200,
                .unprepare = 500,
        },
 };
@@ -2445,8 +2456,8 @@ static const struct of_device_id platform_of_match[] = {
                .compatible = "innolux,n156bge-l21",
                .data = &innolux_n156bge_l21,
        }, {
-               .compatible = "innolux,tv123wam",
-               .data = &innolux_tv123wam,
+               .compatible = "innolux,p120zdg-bf1",
+               .data = &innolux_p120zdg_bf1,
        }, {
                .compatible = "innolux,zj070na-01p",
                .data = &innolux_zj070na_01p,
index af7dcb6da351408391892dff43c9fc574291ae50..e7eb0d1e17be5e6575550f9e6c12d9b96f92a0d5 100644 (file)
@@ -75,7 +75,7 @@ static void sun4i_lvds_encoder_enable(struct drm_encoder *encoder)
 
        DRM_DEBUG_DRIVER("Enabling LVDS output\n");
 
-       if (!IS_ERR(tcon->panel)) {
+       if (tcon->panel) {
                drm_panel_prepare(tcon->panel);
                drm_panel_enable(tcon->panel);
        }
@@ -88,7 +88,7 @@ static void sun4i_lvds_encoder_disable(struct drm_encoder *encoder)
 
        DRM_DEBUG_DRIVER("Disabling LVDS output\n");
 
-       if (!IS_ERR(tcon->panel)) {
+       if (tcon->panel) {
                drm_panel_disable(tcon->panel);
                drm_panel_unprepare(tcon->panel);
        }
index bf068da6b12e11b7a9440fdca6ff03ca84ff2479..f4a22689eb54c238f96626c03d8271d70ce645b8 100644 (file)
@@ -135,7 +135,7 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder)
 
        DRM_DEBUG_DRIVER("Enabling RGB output\n");
 
-       if (!IS_ERR(tcon->panel)) {
+       if (tcon->panel) {
                drm_panel_prepare(tcon->panel);
                drm_panel_enable(tcon->panel);
        }
@@ -148,7 +148,7 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder)
 
        DRM_DEBUG_DRIVER("Disabling RGB output\n");
 
-       if (!IS_ERR(tcon->panel)) {
+       if (tcon->panel) {
                drm_panel_disable(tcon->panel);
                drm_panel_unprepare(tcon->panel);
        }
index c78cd35a1294b215f84b33031ac7d826760b885a..f949287d926cd07f6859331c769ef02621ef130f 100644 (file)
@@ -491,7 +491,8 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
        sun4i_tcon0_mode_set_common(tcon, mode);
 
        /* Set dithering if needed */
-       sun4i_tcon0_mode_set_dithering(tcon, tcon->panel->connector);
+       if (tcon->panel)
+               sun4i_tcon0_mode_set_dithering(tcon, tcon->panel->connector);
 
        /* Adjust clock delay */
        clk_delay = sun4i_tcon_get_clk_delay(mode, 0);
@@ -555,7 +556,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
         * Following code is a way to avoid quirks all around TCON
         * and DOTCLOCK drivers.
         */
-       if (!IS_ERR(tcon->panel)) {
+       if (tcon->panel) {
                struct drm_panel *panel = tcon->panel;
                struct drm_connector *connector = panel->connector;
                struct drm_display_info display_info = connector->display_info;
index cf2a18571d484d078dc1eabc59a3d6ff0f11ab07..a132c37d733490fa70af2674237162d73459a31d 100644 (file)
@@ -380,6 +380,9 @@ int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
                        mutex_unlock(&vgasr_mutex);
                        return -EINVAL;
                }
+               /* notify if GPU has been already bound */
+               if (ops->gpu_bound)
+                       ops->gpu_bound(pdev, id);
        }
        mutex_unlock(&vgasr_mutex);
 
index 5ed319e3b084d920cddf6adc5a665c5732c8c77c..41e9935fc5849e0992e3103912fbe01a5af2a555 100644 (file)
@@ -149,6 +149,7 @@ config HID_APPLEIR
 config HID_ASUS
        tristate "Asus"
        depends on LEDS_CLASS
+       depends on ASUS_WMI || ASUS_WMI=n
        ---help---
        Support for Asus notebook built-in keyboard and touchpad via i2c, and
        the Asus Republic of Gamers laptop keyboard special keys.
index aec253b44156891bf71c192c1cd1e41b2e5801a3..3cd7229b6e5465b88759d42a9a12084cd948eeab 100644 (file)
@@ -660,6 +660,20 @@ exit:
        return ret;
 }
 
+static int alps_sp_open(struct input_dev *dev)
+{
+       struct hid_device *hid = input_get_drvdata(dev);
+
+       return hid_hw_open(hid);
+}
+
+static void alps_sp_close(struct input_dev *dev)
+{
+       struct hid_device *hid = input_get_drvdata(dev);
+
+       hid_hw_close(hid);
+}
+
 static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi)
 {
        struct alps_dev *data = hid_get_drvdata(hdev);
@@ -733,6 +747,10 @@ static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi)
                input2->id.version = input->id.version;
                input2->dev.parent = input->dev.parent;
 
+               input_set_drvdata(input2, hdev);
+               input2->open = alps_sp_open;
+               input2->close = alps_sp_close;
+
                __set_bit(EV_KEY, input2->evbit);
                data->sp_btn_cnt = (data->sp_btn_info & 0x0F);
                for (i = 0; i < data->sp_btn_cnt; i++)
index 88a5672f42cd814187ede87e8783414023824bf2..a1fa2fc8c9b57fd8e3de462d35b6247bd0d3e6e3 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/dmi.h>
 #include <linux/hid.h>
 #include <linux/module.h>
+#include <linux/platform_data/x86/asus-wmi.h>
 #include <linux/input/mt.h>
 #include <linux/usb.h> /* For to_usb_interface for T100 touchpad intf check */
 
@@ -349,6 +350,27 @@ static void asus_kbd_backlight_work(struct work_struct *work)
                hid_err(led->hdev, "Asus failed to set keyboard backlight: %d\n", ret);
 }
 
+/* WMI-based keyboard backlight LED control (via asus-wmi driver) takes
+ * precedence. We only activate HID-based backlight control when the
+ * WMI control is not available.
+ */
+static bool asus_kbd_wmi_led_control_present(struct hid_device *hdev)
+{
+       u32 value;
+       int ret;
+
+       if (!IS_ENABLED(CONFIG_ASUS_WMI))
+               return false;
+
+       ret = asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS2,
+                                      ASUS_WMI_DEVID_KBD_BACKLIGHT, 0, &value);
+       hid_dbg(hdev, "WMI backlight check: rc %d value %x", ret, value);
+       if (ret)
+               return false;
+
+       return !!(value & ASUS_WMI_DSTS_PRESENCE_BIT);
+}
+
 static int asus_kbd_register_leds(struct hid_device *hdev)
 {
        struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
@@ -436,7 +458,9 @@ static int asus_input_configured(struct hid_device *hdev, struct hid_input *hi)
 
        drvdata->input = input;
 
-       if (drvdata->enable_backlight && asus_kbd_register_leds(hdev))
+       if (drvdata->enable_backlight &&
+           !asus_kbd_wmi_led_control_present(hdev) &&
+           asus_kbd_register_leds(hdev))
                hid_warn(hdev, "Failed to initialize backlight.\n");
 
        return 0;
index f63489c882bb64f98f4c5bf9c84cbeb9df192c71..c0d668944dbe876260a926c0125f4ec1ab4afc52 100644 (file)
 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3003                0x3003
 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008                0x3008
 
+#define I2C_VENDOR_ID_RAYDIUM          0x2386
+#define I2C_PRODUCT_ID_RAYDIUM_4B33    0x4b33
+
 #define USB_VENDOR_ID_RAZER            0x1532
 #define USB_DEVICE_ID_RAZER_BLADE_14   0x011D
 
index 52c3b01917e7236821b804a35711b1e034da4788..8237dd86fb17fa9157fd4c97b6da734e34af9733 100644 (file)
@@ -107,7 +107,6 @@ static const struct hid_device_id hid_quirks[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT },
-       { HID_USB_DEVICE(USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS), HID_QUIRK_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2), HID_QUIRK_NO_INIT_REPORTS },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2), HID_QUIRK_NO_INIT_REPORTS },
index 4aab96cf081861f863a008cda0a080425fdf31fa..3cde7c1b9c33cd673858d5089ed097e724fb12b0 100644 (file)
@@ -49,6 +49,7 @@
 #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV       BIT(0)
 #define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET       BIT(1)
 #define I2C_HID_QUIRK_NO_RUNTIME_PM            BIT(2)
+#define I2C_HID_QUIRK_DELAY_AFTER_SLEEP                BIT(3)
 
 /* flags */
 #define I2C_HID_STARTED                0
@@ -158,6 +159,8 @@ struct i2c_hid {
 
        bool                    irq_wake_enabled;
        struct mutex            reset_lock;
+
+       unsigned long           sleep_delay;
 };
 
 static const struct i2c_hid_quirks {
@@ -172,6 +175,8 @@ static const struct i2c_hid_quirks {
        { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288,
                I2C_HID_QUIRK_NO_IRQ_AFTER_RESET |
                I2C_HID_QUIRK_NO_RUNTIME_PM },
+       { I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_4B33,
+               I2C_HID_QUIRK_DELAY_AFTER_SLEEP },
        { 0, 0 }
 };
 
@@ -387,6 +392,7 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state)
 {
        struct i2c_hid *ihid = i2c_get_clientdata(client);
        int ret;
+       unsigned long now, delay;
 
        i2c_hid_dbg(ihid, "%s\n", __func__);
 
@@ -404,9 +410,22 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state)
                        goto set_pwr_exit;
        }
 
+       if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP &&
+           power_state == I2C_HID_PWR_ON) {
+               now = jiffies;
+               if (time_after(ihid->sleep_delay, now)) {
+                       delay = jiffies_to_usecs(ihid->sleep_delay - now);
+                       usleep_range(delay, delay + 1);
+               }
+       }
+
        ret = __i2c_hid_command(client, &hid_set_power_cmd, power_state,
                0, NULL, 0, NULL, 0);
 
+       if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP &&
+           power_state == I2C_HID_PWR_SLEEP)
+               ihid->sleep_delay = jiffies + msecs_to_jiffies(20);
+
        if (ret)
                dev_err(&client->dev, "failed to change power setting.\n");
 
index cac262a912c1248747d2814fa9e3b3d3512f8c76..89f2976f9c534c475da40c3933d2775e46787ae9 100644 (file)
@@ -330,6 +330,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = {
                },
                .driver_data = (void *)&sipodev_desc
        },
+       {
+               .ident = "Direkt-Tek DTLAPY133-1",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Direkt-Tek"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "DTLAPY133-1"),
+               },
+               .driver_data = (void *)&sipodev_desc
+       },
        {
                .ident = "Mediacom Flexbook Edge 11",
                .matches = {
index 23872d08308cdb5857d53b5bcdf907e20d74c345..a746017fac170ca15895435fd4df4fbe3a04d51f 100644 (file)
@@ -512,14 +512,24 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
                        if (cmd == HIDIOCGCOLLECTIONINDEX) {
                                if (uref->usage_index >= field->maxusage)
                                        goto inval;
+                               uref->usage_index =
+                                       array_index_nospec(uref->usage_index,
+                                                          field->maxusage);
                        } else if (uref->usage_index >= field->report_count)
                                goto inval;
                }
 
-               if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
-                   (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
-                    uref->usage_index + uref_multi->num_values > field->report_count))
-                       goto inval;
+               if (cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) {
+                       if (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
+                           uref->usage_index + uref_multi->num_values >
+                           field->report_count)
+                               goto inval;
+
+                       uref->usage_index =
+                               array_index_nospec(uref->usage_index,
+                                                  field->report_count -
+                                                  uref_multi->num_values);
+               }
 
                switch (cmd) {
                case HIDIOCGUSAGE:
index 975c951698846bffb8ae98ec4a70759e60a1f11b..84f61cec6319c8eb65ccc80553cbbdfde0542cc9 100644 (file)
@@ -649,8 +649,10 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
                                if (info[i]->config[j] & HWMON_T_INPUT) {
                                        err = hwmon_thermal_add_sensor(dev,
                                                                hwdev, j);
-                                       if (err)
-                                               goto free_device;
+                                       if (err) {
+                                               device_unregister(hdev);
+                                               goto ida_remove;
+                                       }
                                }
                        }
                }
@@ -658,8 +660,6 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 
        return hdev;
 
-free_device:
-       device_unregister(hdev);
 free_hwmon:
        kfree(hwdev);
 ida_remove:
index 0ccca87f527191dc000649d1a0b1eaf44c87d35b..293dd1c6c7b36ef2b0770cf76e465aaea22b4673 100644 (file)
@@ -181,7 +181,7 @@ static ssize_t show_label(struct device *dev, struct device_attribute *devattr,
        return sprintf(buf, "%s\n", sdata->label);
 }
 
-static int __init get_logical_cpu(int hwcpu)
+static int get_logical_cpu(int hwcpu)
 {
        int cpu;
 
@@ -192,9 +192,8 @@ static int __init get_logical_cpu(int hwcpu)
        return -ENOENT;
 }
 
-static void __init make_sensor_label(struct device_node *np,
-                                    struct sensor_data *sdata,
-                                    const char *label)
+static void make_sensor_label(struct device_node *np,
+                             struct sensor_data *sdata, const char *label)
 {
        u32 id;
        size_t n;
index 56ccb1ea7da5b405e904d90ac38b17303a3d2faa..f2c6819712013046246002346af928bd1ab16bc0 100644 (file)
@@ -224,6 +224,15 @@ config I2C_NFORCE2_S4985
          This driver can also be built as a module.  If so, the module
          will be called i2c-nforce2-s4985.
 
+config I2C_NVIDIA_GPU
+       tristate "NVIDIA GPU I2C controller"
+       depends on PCI
+       help
+         If you say yes to this option, support will be included for the
+         NVIDIA GPU I2C controller which is used to communicate with the GPU's
+         Type-C controller. This driver can also be built as a module called
+         i2c-nvidia-gpu.
+
 config I2C_SIS5595
        tristate "SiS 5595"
        depends on PCI
@@ -752,7 +761,7 @@ config I2C_OCORES
 
 config I2C_OMAP
        tristate "OMAP I2C adapter"
-       depends on ARCH_OMAP
+       depends on ARCH_OMAP || ARCH_K3
        default y if MACH_OMAP_H3 || MACH_OMAP_OSK
        help
          If you say yes to this option, support will be included for the
index 18b26af82b1c5425a9dcec9c61cca3cdff694d60..5f0cb6915969aa98d5722b02e0fe9cb9a1ae25a7 100644 (file)
@@ -19,6 +19,7 @@ obj-$(CONFIG_I2C_ISCH)                += i2c-isch.o
 obj-$(CONFIG_I2C_ISMT)         += i2c-ismt.o
 obj-$(CONFIG_I2C_NFORCE2)      += i2c-nforce2.o
 obj-$(CONFIG_I2C_NFORCE2_S4985)        += i2c-nforce2-s4985.o
+obj-$(CONFIG_I2C_NVIDIA_GPU)   += i2c-nvidia-gpu.o
 obj-$(CONFIG_I2C_PIIX4)                += i2c-piix4.o
 obj-$(CONFIG_I2C_SIS5595)      += i2c-sis5595.o
 obj-$(CONFIG_I2C_SIS630)       += i2c-sis630.o
diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c
new file mode 100644 (file)
index 0000000..8822357
--- /dev/null
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Nvidia GPU I2C controller Driver
+ *
+ * Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
+ * Author: Ajay Gupta <ajayg@nvidia.com>
+ */
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+
+#include <asm/unaligned.h>
+
+/* I2C definitions */
+#define I2C_MST_CNTL                           0x00
+#define I2C_MST_CNTL_GEN_START                 BIT(0)
+#define I2C_MST_CNTL_GEN_STOP                  BIT(1)
+#define I2C_MST_CNTL_CMD_READ                  (1 << 2)
+#define I2C_MST_CNTL_CMD_WRITE                 (2 << 2)
+#define I2C_MST_CNTL_BURST_SIZE_SHIFT          6
+#define I2C_MST_CNTL_GEN_NACK                  BIT(28)
+#define I2C_MST_CNTL_STATUS                    GENMASK(30, 29)
+#define I2C_MST_CNTL_STATUS_OKAY               (0 << 29)
+#define I2C_MST_CNTL_STATUS_NO_ACK             (1 << 29)
+#define I2C_MST_CNTL_STATUS_TIMEOUT            (2 << 29)
+#define I2C_MST_CNTL_STATUS_BUS_BUSY           (3 << 29)
+#define I2C_MST_CNTL_CYCLE_TRIGGER             BIT(31)
+
+#define I2C_MST_ADDR                           0x04
+
+#define I2C_MST_I2C0_TIMING                            0x08
+#define I2C_MST_I2C0_TIMING_SCL_PERIOD_100KHZ          0x10e
+#define I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT            16
+#define I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT_MAX                255
+#define I2C_MST_I2C0_TIMING_TIMEOUT_CHECK              BIT(24)
+
+#define I2C_MST_DATA                                   0x0c
+
+#define I2C_MST_HYBRID_PADCTL                          0x20
+#define I2C_MST_HYBRID_PADCTL_MODE_I2C                 BIT(0)
+#define I2C_MST_HYBRID_PADCTL_I2C_SCL_INPUT_RCV                BIT(14)
+#define I2C_MST_HYBRID_PADCTL_I2C_SDA_INPUT_RCV                BIT(15)
+
+struct gpu_i2c_dev {
+       struct device *dev;
+       void __iomem *regs;
+       struct i2c_adapter adapter;
+       struct i2c_board_info *gpu_ccgx_ucsi;
+};
+
+static void gpu_enable_i2c_bus(struct gpu_i2c_dev *i2cd)
+{
+       u32 val;
+
+       /* enable I2C */
+       val = readl(i2cd->regs + I2C_MST_HYBRID_PADCTL);
+       val |= I2C_MST_HYBRID_PADCTL_MODE_I2C |
+               I2C_MST_HYBRID_PADCTL_I2C_SCL_INPUT_RCV |
+               I2C_MST_HYBRID_PADCTL_I2C_SDA_INPUT_RCV;
+       writel(val, i2cd->regs + I2C_MST_HYBRID_PADCTL);
+
+       /* enable 100KHZ mode */
+       val = I2C_MST_I2C0_TIMING_SCL_PERIOD_100KHZ;
+       val |= (I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT_MAX
+           << I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT);
+       val |= I2C_MST_I2C0_TIMING_TIMEOUT_CHECK;
+       writel(val, i2cd->regs + I2C_MST_I2C0_TIMING);
+}
+
+static int gpu_i2c_check_status(struct gpu_i2c_dev *i2cd)
+{
+       unsigned long target = jiffies + msecs_to_jiffies(1000);
+       u32 val;
+
+       do {
+               val = readl(i2cd->regs + I2C_MST_CNTL);
+               if (!(val & I2C_MST_CNTL_CYCLE_TRIGGER))
+                       break;
+               if ((val & I2C_MST_CNTL_STATUS) !=
+                               I2C_MST_CNTL_STATUS_BUS_BUSY)
+                       break;
+               usleep_range(500, 600);
+       } while (time_is_after_jiffies(target));
+
+       if (time_is_before_jiffies(target)) {
+               dev_err(i2cd->dev, "i2c timeout error %x\n", val);
+               return -ETIME;
+       }
+
+       val = readl(i2cd->regs + I2C_MST_CNTL);
+       switch (val & I2C_MST_CNTL_STATUS) {
+       case I2C_MST_CNTL_STATUS_OKAY:
+               return 0;
+       case I2C_MST_CNTL_STATUS_NO_ACK:
+               return -EIO;
+       case I2C_MST_CNTL_STATUS_TIMEOUT:
+               return -ETIME;
+       default:
+               return 0;
+       }
+}
+
+static int gpu_i2c_read(struct gpu_i2c_dev *i2cd, u8 *data, u16 len)
+{
+       int status;
+       u32 val;
+
+       val = I2C_MST_CNTL_GEN_START | I2C_MST_CNTL_CMD_READ |
+               (len << I2C_MST_CNTL_BURST_SIZE_SHIFT) |
+               I2C_MST_CNTL_CYCLE_TRIGGER | I2C_MST_CNTL_GEN_NACK;
+       writel(val, i2cd->regs + I2C_MST_CNTL);
+
+       status = gpu_i2c_check_status(i2cd);
+       if (status < 0)
+               return status;
+
+       val = readl(i2cd->regs + I2C_MST_DATA);
+       switch (len) {
+       case 1:
+               data[0] = val;
+               break;
+       case 2:
+               put_unaligned_be16(val, data);
+               break;
+       case 3:
+               put_unaligned_be16(val >> 8, data);
+               data[2] = val;
+               break;
+       case 4:
+               put_unaligned_be32(val, data);
+               break;
+       default:
+               break;
+       }
+       return status;
+}
+
+static int gpu_i2c_start(struct gpu_i2c_dev *i2cd)
+{
+       writel(I2C_MST_CNTL_GEN_START, i2cd->regs + I2C_MST_CNTL);
+       return gpu_i2c_check_status(i2cd);
+}
+
+static int gpu_i2c_stop(struct gpu_i2c_dev *i2cd)
+{
+       writel(I2C_MST_CNTL_GEN_STOP, i2cd->regs + I2C_MST_CNTL);
+       return gpu_i2c_check_status(i2cd);
+}
+
+static int gpu_i2c_write(struct gpu_i2c_dev *i2cd, u8 data)
+{
+       u32 val;
+
+       writel(data, i2cd->regs + I2C_MST_DATA);
+
+       val = I2C_MST_CNTL_CMD_WRITE | (1 << I2C_MST_CNTL_BURST_SIZE_SHIFT);
+       writel(val, i2cd->regs + I2C_MST_CNTL);
+
+       return gpu_i2c_check_status(i2cd);
+}
+
+static int gpu_i2c_master_xfer(struct i2c_adapter *adap,
+                              struct i2c_msg *msgs, int num)
+{
+       struct gpu_i2c_dev *i2cd = i2c_get_adapdata(adap);
+       int status, status2;
+       int i, j;
+
+       /*
+        * The controller supports maximum 4 byte read due to known
+        * limitation of sending STOP after every read.
+        */
+       for (i = 0; i < num; i++) {
+               if (msgs[i].flags & I2C_M_RD) {
+                       /* program client address before starting read */
+                       writel(msgs[i].addr, i2cd->regs + I2C_MST_ADDR);
+                       /* gpu_i2c_read has implicit start */
+                       status = gpu_i2c_read(i2cd, msgs[i].buf, msgs[i].len);
+                       if (status < 0)
+                               goto stop;
+               } else {
+                       u8 addr = i2c_8bit_addr_from_msg(msgs + i);
+
+                       status = gpu_i2c_start(i2cd);
+                       if (status < 0) {
+                               if (i == 0)
+                                       return status;
+                               goto stop;
+                       }
+
+                       status = gpu_i2c_write(i2cd, addr);
+                       if (status < 0)
+                               goto stop;
+
+                       for (j = 0; j < msgs[i].len; j++) {
+                               status = gpu_i2c_write(i2cd, msgs[i].buf[j]);
+                               if (status < 0)
+                                       goto stop;
+                       }
+               }
+       }
+       status = gpu_i2c_stop(i2cd);
+       if (status < 0)
+               return status;
+
+       return i;
+stop:
+       status2 = gpu_i2c_stop(i2cd);
+       if (status2 < 0)
+               dev_err(i2cd->dev, "i2c stop failed %d\n", status2);
+       return status;
+}
+
+static const struct i2c_adapter_quirks gpu_i2c_quirks = {
+       .max_read_len = 4,
+       .flags = I2C_AQ_COMB_WRITE_THEN_READ,
+};
+
+static u32 gpu_i2c_functionality(struct i2c_adapter *adap)
+{
+       return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm gpu_i2c_algorithm = {
+       .master_xfer    = gpu_i2c_master_xfer,
+       .functionality  = gpu_i2c_functionality,
+};
+
+/*
+ * This driver is for Nvidia GPU cards with USB Type-C interface.
+ * We want to identify the cards using vendor ID and class code only
+ * to avoid dependency of adding product id for any new card which
+ * requires this driver.
+ * Currently there is no class code defined for UCSI device over PCI
+ * so using UNKNOWN class for now and it will be updated when UCSI
+ * over PCI gets a class code.
+ * There is no other NVIDIA cards with UNKNOWN class code. Even if the
+ * driver gets loaded for an undesired card then eventually i2c_read()
+ * (initiated from UCSI i2c_client) will timeout or UCSI commands will
+ * timeout.
+ */
+#define PCI_CLASS_SERIAL_UNKNOWN       0x0c80
+static const struct pci_device_id gpu_i2c_ids[] = {
+       { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+               PCI_CLASS_SERIAL_UNKNOWN << 8, 0xffffff00},
+       { }
+};
+MODULE_DEVICE_TABLE(pci, gpu_i2c_ids);
+
+static int gpu_populate_client(struct gpu_i2c_dev *i2cd, int irq)
+{
+       struct i2c_client *ccgx_client;
+
+       i2cd->gpu_ccgx_ucsi = devm_kzalloc(i2cd->dev,
+                                          sizeof(*i2cd->gpu_ccgx_ucsi),
+                                          GFP_KERNEL);
+       if (!i2cd->gpu_ccgx_ucsi)
+               return -ENOMEM;
+
+       strlcpy(i2cd->gpu_ccgx_ucsi->type, "ccgx-ucsi",
+               sizeof(i2cd->gpu_ccgx_ucsi->type));
+       i2cd->gpu_ccgx_ucsi->addr = 0x8;
+       i2cd->gpu_ccgx_ucsi->irq = irq;
+       ccgx_client = i2c_new_device(&i2cd->adapter, i2cd->gpu_ccgx_ucsi);
+       if (!ccgx_client)
+               return -ENODEV;
+
+       return 0;
+}
+
+static int gpu_i2c_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct gpu_i2c_dev *i2cd;
+       int status;
+
+       i2cd = devm_kzalloc(&pdev->dev, sizeof(*i2cd), GFP_KERNEL);
+       if (!i2cd)
+               return -ENOMEM;
+
+       i2cd->dev = &pdev->dev;
+       dev_set_drvdata(&pdev->dev, i2cd);
+
+       status = pcim_enable_device(pdev);
+       if (status < 0) {
+               dev_err(&pdev->dev, "pcim_enable_device failed %d\n", status);
+               return status;
+       }
+
+       pci_set_master(pdev);
+
+       i2cd->regs = pcim_iomap(pdev, 0, 0);
+       if (!i2cd->regs) {
+               dev_err(&pdev->dev, "pcim_iomap failed\n");
+               return -ENOMEM;
+       }
+
+       status = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+       if (status < 0) {
+               dev_err(&pdev->dev, "pci_alloc_irq_vectors err %d\n", status);
+               return status;
+       }
+
+       gpu_enable_i2c_bus(i2cd);
+
+       i2c_set_adapdata(&i2cd->adapter, i2cd);
+       i2cd->adapter.owner = THIS_MODULE;
+       strlcpy(i2cd->adapter.name, "NVIDIA GPU I2C adapter",
+               sizeof(i2cd->adapter.name));
+       i2cd->adapter.algo = &gpu_i2c_algorithm;
+       i2cd->adapter.quirks = &gpu_i2c_quirks;
+       i2cd->adapter.dev.parent = &pdev->dev;
+       status = i2c_add_adapter(&i2cd->adapter);
+       if (status < 0)
+               goto free_irq_vectors;
+
+       status = gpu_populate_client(i2cd, pdev->irq);
+       if (status < 0) {
+               dev_err(&pdev->dev, "gpu_populate_client failed %d\n", status);
+               goto del_adapter;
+       }
+
+       return 0;
+
+del_adapter:
+       i2c_del_adapter(&i2cd->adapter);
+free_irq_vectors:
+       pci_free_irq_vectors(pdev);
+       return status;
+}
+
+static void gpu_i2c_remove(struct pci_dev *pdev)
+{
+       struct gpu_i2c_dev *i2cd = dev_get_drvdata(&pdev->dev);
+
+       i2c_del_adapter(&i2cd->adapter);
+       pci_free_irq_vectors(pdev);
+}
+
+static int gpu_i2c_resume(struct device *dev)
+{
+       struct gpu_i2c_dev *i2cd = dev_get_drvdata(dev);
+
+       gpu_enable_i2c_bus(i2cd);
+       return 0;
+}
+
+static UNIVERSAL_DEV_PM_OPS(gpu_i2c_driver_pm, NULL, gpu_i2c_resume, NULL);
+
+static struct pci_driver gpu_i2c_driver = {
+       .name           = "nvidia-gpu",
+       .id_table       = gpu_i2c_ids,
+       .probe          = gpu_i2c_probe,
+       .remove         = gpu_i2c_remove,
+       .driver         = {
+               .pm     = &gpu_i2c_driver_pm,
+       },
+};
+
+module_pci_driver(gpu_i2c_driver);
+
+MODULE_AUTHOR("Ajay Gupta <ajayg@nvidia.com>");
+MODULE_DESCRIPTION("Nvidia GPU I2C controller Driver");
+MODULE_LICENSE("GPL v2");
index 527f55c8c4c70e560a9787a610c68017fbb10235..db075bc0d9525d62a7b366abd7c6eb1edeaaa76c 100644 (file)
@@ -571,18 +571,19 @@ static int geni_i2c_probe(struct platform_device *pdev)
 
        dev_dbg(&pdev->dev, "i2c fifo/se-dma mode. fifo depth:%d\n", tx_depth);
 
-       ret = i2c_add_adapter(&gi2c->adap);
-       if (ret) {
-               dev_err(&pdev->dev, "Error adding i2c adapter %d\n", ret);
-               return ret;
-       }
-
        gi2c->suspended = 1;
        pm_runtime_set_suspended(gi2c->se.dev);
        pm_runtime_set_autosuspend_delay(gi2c->se.dev, I2C_AUTO_SUSPEND_DELAY);
        pm_runtime_use_autosuspend(gi2c->se.dev);
        pm_runtime_enable(gi2c->se.dev);
 
+       ret = i2c_add_adapter(&gi2c->adap);
+       if (ret) {
+               dev_err(&pdev->dev, "Error adding i2c adapter %d\n", ret);
+               pm_runtime_disable(gi2c->se.dev);
+               return ret;
+       }
+
        return 0;
 }
 
@@ -590,8 +591,8 @@ static int geni_i2c_remove(struct platform_device *pdev)
 {
        struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
 
-       pm_runtime_disable(gi2c->se.dev);
        i2c_del_adapter(&gi2c->adap);
+       pm_runtime_disable(gi2c->se.dev);
        return 0;
 }
 
index dc78aa7369def416898f0a4c514fd017c147f0c0..28460f6a60cc15220c9a8748b3bd688e81244c37 100644 (file)
@@ -306,10 +306,7 @@ static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client)
        if (client->flags & I2C_CLIENT_TEN)
                return -EINVAL;
 
-       irq = irq_find_mapping(adap->host_notify_domain, client->addr);
-       if (!irq)
-               irq = irq_create_mapping(adap->host_notify_domain,
-                                        client->addr);
+       irq = irq_create_mapping(adap->host_notify_domain, client->addr);
 
        return irq > 0 ? irq : -ENXIO;
 }
@@ -433,6 +430,8 @@ static int i2c_device_remove(struct device *dev)
        dev_pm_clear_wake_irq(&client->dev);
        device_init_wakeup(&client->dev, false);
 
+       client->irq = 0;
+
        return status;
 }
 
index 566d69a2edbc723b8370483a39bf66ca60aaef62..add4c9c934c8abda564b25904dc7b9f479ca7afc 100644 (file)
@@ -384,9 +384,9 @@ static int mvebu_sei_probe(struct platform_device *pdev)
 
        sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        sei->base = devm_ioremap_resource(sei->dev, sei->res);
-       if (!sei->base) {
+       if (IS_ERR(sei->base)) {
                dev_err(sei->dev, "Failed to remap SEI resource\n");
-               return -ENODEV;
+               return PTR_ERR(sei->base);
        }
 
        /* Retrieve the SEI capabilities with the interrupt ranges */
index b05022f94f18c22ff2c72fa71cf11984077152b7..072bb5e36c184e6945624ec07dc27595fe8e7a7d 100644 (file)
@@ -718,8 +718,7 @@ l1oip_socket_thread(void *data)
                printk(KERN_DEBUG "%s: socket created and open\n",
                       __func__);
        while (!signal_pending(current)) {
-               iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1,
-                               recvbuf_size);
+               iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, recvbuf_size);
                recvlen = sock_recvmsg(socket, &msg, 0);
                if (recvlen > 0) {
                        l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen);
index ce7acd115dd8da7578b4fc8d3fc3733692d90743..1870cf87afe1ef7993b6e2cda9a28aeede31f268 100644 (file)
@@ -75,8 +75,6 @@ static void pattern_trig_timer_function(struct timer_list *t)
 {
        struct pattern_trig_data *data = from_timer(data, t, timer);
 
-       mutex_lock(&data->lock);
-
        for (;;) {
                if (!data->is_indefinite && !data->repeat)
                        break;
@@ -87,9 +85,10 @@ static void pattern_trig_timer_function(struct timer_list *t)
                                           data->curr->brightness);
                        mod_timer(&data->timer,
                                  jiffies + msecs_to_jiffies(data->curr->delta_t));
-
-                       /* Skip the tuple with zero duration */
-                       pattern_trig_update_patterns(data);
+                       if (!data->next->delta_t) {
+                               /* Skip the tuple with zero duration */
+                               pattern_trig_update_patterns(data);
+                       }
                        /* Select next tuple */
                        pattern_trig_update_patterns(data);
                } else {
@@ -116,8 +115,6 @@ static void pattern_trig_timer_function(struct timer_list *t)
 
                break;
        }
-
-       mutex_unlock(&data->lock);
 }
 
 static int pattern_trig_start_pattern(struct led_classdev *led_cdev)
@@ -176,14 +173,10 @@ static ssize_t repeat_store(struct device *dev, struct device_attribute *attr,
        if (res < -1 || res == 0)
                return -EINVAL;
 
-       /*
-        * Clear previous patterns' performence firstly, and remove the timer
-        * without mutex lock to avoid dead lock.
-        */
-       del_timer_sync(&data->timer);
-
        mutex_lock(&data->lock);
 
+       del_timer_sync(&data->timer);
+
        if (data->is_hw_pattern)
                led_cdev->pattern_clear(led_cdev);
 
@@ -234,14 +227,10 @@ static ssize_t pattern_trig_store_patterns(struct led_classdev *led_cdev,
        struct pattern_trig_data *data = led_cdev->trigger_data;
        int ccount, cr, offset = 0, err = 0;
 
-       /*
-        * Clear previous patterns' performence firstly, and remove the timer
-        * without mutex lock to avoid dead lock.
-        */
-       del_timer_sync(&data->timer);
-
        mutex_lock(&data->lock);
 
+       del_timer_sync(&data->timer);
+
        if (data->is_hw_pattern)
                led_cdev->pattern_clear(led_cdev);
 
index f3fb5bb8c82a1cfe861aef1af22b581709815817..ac1cffd2a09b05f5f5217e579c9e87ea80efce84 100644 (file)
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
                    !discard_bio)
                        continue;
                bio_chain(discard_bio, bio);
-               bio_clone_blkg_association(discard_bio, bio);
+               bio_clone_blkcg_association(discard_bio, bio);
                if (mddev->gendisk)
                        trace_block_bio_remap(bdev_get_queue(rdev->bdev),
                                discard_bio, disk_devt(mddev->gendisk),
index 3370a4138e942621a008573b1fa1ad6912b1c75c..951c984de61ae9e85af22eb05e76199c5752baa6 100644 (file)
@@ -8,7 +8,9 @@ lkdtm-$(CONFIG_LKDTM)           += perms.o
 lkdtm-$(CONFIG_LKDTM)          += refcount.o
 lkdtm-$(CONFIG_LKDTM)          += rodata_objcopy.o
 lkdtm-$(CONFIG_LKDTM)          += usercopy.o
+lkdtm-$(CONFIG_LKDTM)          += stackleak.o
 
+KASAN_SANITIZE_stackleak.o     := n
 KCOV_INSTRUMENT_rodata.o       := n
 
 OBJCOPYFLAGS :=
index 5a755590d3dcefe85b0354c88bacc9dffd2e392d..2837dc77478ed43e9a8561c850c92879f010b07d 100644 (file)
@@ -184,6 +184,7 @@ static const struct crashtype crashtypes[] = {
        CRASHTYPE(USERCOPY_STACK_BEYOND),
        CRASHTYPE(USERCOPY_KERNEL),
        CRASHTYPE(USERCOPY_KERNEL_DS),
+       CRASHTYPE(STACKLEAK_ERASING),
 };
 
 
index 07db641d71d023bd2eb710873114261170cce079..3c6fd327e166a4c83dfef10f8d83209cd4f6f892 100644 (file)
@@ -84,4 +84,7 @@ void lkdtm_USERCOPY_STACK_BEYOND(void);
 void lkdtm_USERCOPY_KERNEL(void);
 void lkdtm_USERCOPY_KERNEL_DS(void);
 
+/* lkdtm_stackleak.c */
+void lkdtm_STACKLEAK_ERASING(void);
+
 #endif
diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c
new file mode 100644 (file)
index 0000000..d5a0844
--- /dev/null
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code tests that the current task stack is properly erased (filled
+ * with STACKLEAK_POISON).
+ *
+ * Authors:
+ *   Alexander Popov <alex.popov@linux.com>
+ *   Tycho Andersen <tycho@tycho.ws>
+ */
+
+#include "lkdtm.h"
+#include <linux/stackleak.h>
+
+void lkdtm_STACKLEAK_ERASING(void)
+{
+       unsigned long *sp, left, found, i;
+       const unsigned long check_depth =
+                       STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+
+       /*
+        * For the details about the alignment of the poison values, see
+        * the comment in stackleak_track_stack().
+        */
+       sp = PTR_ALIGN(&i, sizeof(unsigned long));
+
+       left = ((unsigned long)sp & (THREAD_SIZE - 1)) / sizeof(unsigned long);
+       sp--;
+
+       /*
+        * One 'long int' at the bottom of the thread stack is reserved
+        * and not poisoned.
+        */
+       if (left > 1) {
+               left--;
+       } else {
+               pr_err("FAIL: not enough stack space for the test\n");
+               return;
+       }
+
+       pr_info("checking unused part of the thread stack (%lu bytes)...\n",
+                                       left * sizeof(unsigned long));
+
+       /*
+        * Search for 'check_depth' poison values in a row (just like
+        * stackleak_erase() does).
+        */
+       for (i = 0, found = 0; i < left && found <= check_depth; i++) {
+               if (*(sp - i) == STACKLEAK_POISON)
+                       found++;
+               else
+                       found = 0;
+       }
+
+       if (found <= check_depth) {
+               pr_err("FAIL: thread stack is not erased (checked %lu bytes)\n",
+                                               i * sizeof(unsigned long));
+               return;
+       }
+
+       pr_info("first %lu bytes are unpoisoned\n",
+                               (i - found) * sizeof(unsigned long));
+
+       /* The rest of thread stack should be erased */
+       for (; i < left; i++) {
+               if (*(sp - i) != STACKLEAK_POISON) {
+                       pr_err("FAIL: thread stack is NOT properly erased\n");
+                       return;
+               }
+       }
+
+       pr_info("OK: the rest of the thread stack is properly erased\n");
+       return;
+}
index bd52f29b4a4e273eb6f69a56c34a0789279d2226..264f4ed8eef26e8f839d121d905497c3c75de67c 100644 (file)
@@ -3030,7 +3030,7 @@ ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
        if (!qpair || !buf)
                return VMCI_ERROR_INVALID_ARGS;
 
-       iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size);
+       iov_iter_kvec(&from, WRITE, &v, 1, buf_size);
 
        qp_lock(qpair);
 
@@ -3074,7 +3074,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
        if (!qpair || !buf)
                return VMCI_ERROR_INVALID_ARGS;
 
-       iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+       iov_iter_kvec(&to, READ, &v, 1, buf_size);
 
        qp_lock(qpair);
 
@@ -3119,7 +3119,7 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
        if (!qpair || !buf)
                return VMCI_ERROR_INVALID_ARGS;
 
-       iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+       iov_iter_kvec(&to, READ, &v, 1, buf_size);
 
        qp_lock(qpair);
 
index e514d57a0419defecb8dcbbc8be4604aea1321da..aa983422aa970f1035201a1a4841b7a09d9acc3f 100644 (file)
@@ -207,7 +207,7 @@ comment "Disk-On-Chip Device Drivers"
 config MTD_DOCG3
        tristate "M-Systems Disk-On-Chip G3"
        select BCH
-       select BCH_CONST_PARAMS
+       select BCH_CONST_PARAMS if !MTD_NAND_BCH
        select BITREVERSE
        help
          This provides an MTD device driver for the M-Systems DiskOnChip
index 784c6e1a0391e92c90723e698d8bc148fe3e4916..fd5fe12d74613ecebddb88699dcae5e1862d3829 100644 (file)
@@ -221,7 +221,14 @@ static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev,
                info->mtd = info->subdev[0].mtd;
                ret = 0;
        } else if (info->num_subdev > 1) {
-               struct mtd_info *cdev[nr];
+               struct mtd_info **cdev;
+
+               cdev = kmalloc_array(nr, sizeof(*cdev), GFP_KERNEL);
+               if (!cdev) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
                /*
                 * We detected multiple devices.  Concatenate them together.
                 */
@@ -230,6 +237,7 @@ static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev,
 
                info->mtd = mtd_concat_create(cdev, info->num_subdev,
                                              plat->name);
+               kfree(cdev);
                if (info->mtd == NULL) {
                        ret = -ENXIO;
                        goto err;
index 05bd0779fe9bf7eae08acca31b7ba30f7592b9b1..71050a0b31dfe3b6bf273ff4c240e7cfe307080f 100644 (file)
@@ -590,7 +590,6 @@ retry:
 
 /**
  * panic_nand_wait - [GENERIC] wait until the command is done
- * @mtd: MTD device structure
  * @chip: NAND chip structure
  * @timeo: timeout
  *
index e24db817154ee73ad1fc0fd9586f4e294fc2886a..d846428ef038e6b76f84591f73d40e9a4d30db33 100644 (file)
@@ -996,7 +996,7 @@ static int cqspi_direct_read_execute(struct spi_nor *nor, u_char *buf,
 err_unmap:
        dma_unmap_single(nor->dev, dma_dst, len, DMA_FROM_DEVICE);
 
-       return 0;
+       return ret;
 }
 
 static ssize_t cqspi_read(struct spi_nor *nor, loff_t from,
index 9407ca5f9443338d56a355fe0eadfa6641a429dc..3e54e31889c7b53bbba362a5bd497279dc91b128 100644 (file)
@@ -3250,12 +3250,14 @@ static int spi_nor_init_params(struct spi_nor *nor,
                memcpy(&sfdp_params, params, sizeof(sfdp_params));
                memcpy(&prev_map, &nor->erase_map, sizeof(prev_map));
 
-               if (spi_nor_parse_sfdp(nor, &sfdp_params))
+               if (spi_nor_parse_sfdp(nor, &sfdp_params)) {
+                       nor->addr_width = 0;
                        /* restore previous erase map */
                        memcpy(&nor->erase_map, &prev_map,
                               sizeof(nor->erase_map));
-               else
+               } else {
                        memcpy(params, &sfdp_params, sizeof(*params));
+               }
        }
 
        return 0;
index 93ceea4f27d5731f865c57be71f69c35b46d274f..e294d3986ba964d07bc9bcb67f800b4d479d5231 100644 (file)
@@ -1072,6 +1072,7 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
                         * be a result of power cut during erasure.
                         */
                        ai->maybe_bad_peb_count += 1;
+               /* fall through */
        case UBI_IO_BAD_HDR:
                        /*
                         * If we're facing a bad VID header we have to drop *all*
index d2a726654ff1182e961f22ff365dc461996b7204..a4e3454133a47eacdcc61034c11b6c98d831e9c8 100644 (file)
@@ -1334,8 +1334,10 @@ static int bytes_str_to_int(const char *str)
        switch (*endp) {
        case 'G':
                result *= 1024;
+               /* fall through */
        case 'M':
                result *= 1024;
+               /* fall through */
        case 'K':
                result *= 1024;
                if (endp[1] == 'i' && endp[2] == 'B')
index ffa37adb76817f454505b32d010056dfc4d20dc8..333387f1f1fe66490cda8904a7d6c7aeb2d15287 100644 (file)
@@ -3112,13 +3112,13 @@ static int bond_slave_netdev_event(unsigned long event,
        case NETDEV_CHANGE:
                /* For 802.3ad mode only:
                 * Getting invalid Speed/Duplex values here will put slave
-                * in weird state. So mark it as link-down for the time
+                * in weird state. So mark it as link-fail for the time
                 * being and let link-monitoring (miimon) set it right when
                 * correct speeds/duplex are available.
                 */
                if (bond_update_speed_duplex(slave) &&
                    BOND_MODE(bond) == BOND_MODE_8023AD)
-                       slave->link = BOND_LINK_DOWN;
+                       slave->link = BOND_LINK_FAIL;
 
                if (BOND_MODE(bond) == BOND_MODE_8023AD)
                        bond_3ad_adapter_speed_duplex_changed(slave);
index 9697977b80f040c3fde59037e2caabe0cdfaa79f..6b9ad86732188c19c87f26a6639d63f95893f139 100644 (file)
@@ -638,8 +638,7 @@ static int bond_fill_info(struct sk_buff *skb,
                                goto nla_put_failure;
 
                        if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM,
-                                   sizeof(bond->params.ad_actor_system),
-                                   &bond->params.ad_actor_system))
+                                   ETH_ALEN, &bond->params.ad_actor_system))
                                goto nla_put_failure;
                }
                if (!bond_3ad_get_active_agg_info(bond, &info)) {
index 54e0ca6ed7308c511ce42bc6ea3dc6e65fb0662b..86b6464b4525c426e09d4d6a9f98bf9a0ee49111 100644 (file)
@@ -1117,11 +1117,6 @@ static int ksz_switch_init(struct ksz_device *dev)
 {
        int i;
 
-       mutex_init(&dev->reg_mutex);
-       mutex_init(&dev->stats_mutex);
-       mutex_init(&dev->alu_mutex);
-       mutex_init(&dev->vlan_mutex);
-
        dev->ds->ops = &ksz_switch_ops;
 
        for (i = 0; i < ARRAY_SIZE(ksz_switch_chips); i++) {
@@ -1206,6 +1201,11 @@ int ksz_switch_register(struct ksz_device *dev)
        if (dev->pdata)
                dev->chip_id = dev->pdata->chip_id;
 
+       mutex_init(&dev->reg_mutex);
+       mutex_init(&dev->stats_mutex);
+       mutex_init(&dev->alu_mutex);
+       mutex_init(&dev->vlan_mutex);
+
        if (ksz_switch_detect(dev))
                return -EINVAL;
 
index 78c5de467426f1e4276cebe8ee81cc0091d4c6fa..9d0e74f6b089df4c304ab49c2749d8bbba47c2e7 100644 (file)
@@ -140,6 +140,5 @@ struct alx_priv {
 };
 
 extern const struct ethtool_ops alx_ethtool_ops;
-extern const char alx_drv_name[];
 
 #endif
index 7968c644ad8617fef2fec1360e869a622c525a02..c131cfc1b79df5a62e048bbf1d15d070e7c0fced 100644 (file)
@@ -49,7 +49,7 @@
 #include "hw.h"
 #include "reg.h"
 
-const char alx_drv_name[] = "alx";
+static const char alx_drv_name[] = "alx";
 
 static void alx_free_txbuf(struct alx_tx_queue *txq, int entry)
 {
index 4122553e224b294d4eff1828201e467fcc5a60b9..0e2d99c737e35192b90d0bf3ce541ef2d6ecd4d1 100644 (file)
@@ -1902,9 +1902,6 @@ static void bcm_sysport_netif_start(struct net_device *dev)
                intrl2_1_mask_clear(priv, 0xffffffff);
        else
                intrl2_0_mask_clear(priv, INTRL2_0_TDMA_MBDONE_MASK);
-
-       /* Last call before we start the real business */
-       netif_tx_start_all_queues(dev);
 }
 
 static void rbuf_init(struct bcm_sysport_priv *priv)
@@ -2048,6 +2045,8 @@ static int bcm_sysport_open(struct net_device *dev)
 
        bcm_sysport_netif_start(dev);
 
+       netif_tx_start_all_queues(dev);
+
        return 0;
 
 out_clear_rx_int:
@@ -2071,7 +2070,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev)
        struct bcm_sysport_priv *priv = netdev_priv(dev);
 
        /* stop all software from updating hardware */
-       netif_tx_stop_all_queues(dev);
+       netif_tx_disable(dev);
        napi_disable(&priv->napi);
        cancel_work_sync(&priv->dim.dim.work);
        phy_stop(dev->phydev);
@@ -2658,12 +2657,12 @@ static int __maybe_unused bcm_sysport_suspend(struct device *d)
        if (!netif_running(dev))
                return 0;
 
+       netif_device_detach(dev);
+
        bcm_sysport_netif_stop(dev);
 
        phy_suspend(dev->phydev);
 
-       netif_device_detach(dev);
-
        /* Disable UniMAC RX */
        umac_enable_set(priv, CMD_RX_EN, 0);
 
@@ -2746,8 +2745,6 @@ static int __maybe_unused bcm_sysport_resume(struct device *d)
                goto out_free_rx_ring;
        }
 
-       netif_device_attach(dev);
-
        /* RX pipe enable */
        topctrl_writel(priv, 0, RX_FLUSH_CNTL);
 
@@ -2788,6 +2785,8 @@ static int __maybe_unused bcm_sysport_resume(struct device *d)
 
        bcm_sysport_netif_start(dev);
 
+       netif_device_attach(dev);
+
        return 0;
 
 out_free_rx_ring:
index 20c1681bb1afeea35e23f20242abc0fe34fd1304..2d6f090bf6440cc7253fe4f0764b10bde618ff73 100644 (file)
@@ -2855,7 +2855,6 @@ static void bcmgenet_netif_start(struct net_device *dev)
 
        umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true);
 
-       netif_tx_start_all_queues(dev);
        bcmgenet_enable_tx_napi(priv);
 
        /* Monitor link interrupts now */
@@ -2937,6 +2936,8 @@ static int bcmgenet_open(struct net_device *dev)
 
        bcmgenet_netif_start(dev);
 
+       netif_tx_start_all_queues(dev);
+
        return 0;
 
 err_irq1:
@@ -2958,7 +2959,7 @@ static void bcmgenet_netif_stop(struct net_device *dev)
        struct bcmgenet_priv *priv = netdev_priv(dev);
 
        bcmgenet_disable_tx_napi(priv);
-       netif_tx_stop_all_queues(dev);
+       netif_tx_disable(dev);
 
        /* Disable MAC receive */
        umac_enable_set(priv, CMD_RX_EN, false);
@@ -3620,13 +3621,13 @@ static int bcmgenet_suspend(struct device *d)
        if (!netif_running(dev))
                return 0;
 
+       netif_device_detach(dev);
+
        bcmgenet_netif_stop(dev);
 
        if (!device_may_wakeup(d))
                phy_suspend(dev->phydev);
 
-       netif_device_detach(dev);
-
        /* Prepare the device for Wake-on-LAN and switch to the slow clock */
        if (device_may_wakeup(d) && priv->wolopts) {
                ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC);
@@ -3700,8 +3701,6 @@ static int bcmgenet_resume(struct device *d)
        /* Always enable ring 16 - descriptor ring */
        bcmgenet_enable_dma(priv, dma_ctrl);
 
-       netif_device_attach(dev);
-
        if (!device_may_wakeup(d))
                phy_resume(dev->phydev);
 
@@ -3710,6 +3709,8 @@ static int bcmgenet_resume(struct device *d)
 
        bcmgenet_netif_start(dev);
 
+       netif_device_attach(dev);
+
        return 0;
 
 out_clk_disable:
index e82e4ca206205da6562fa3449802869a975a86d3..055b40606dbc20f358f6445067b33f226d79554e 100644 (file)
@@ -316,8 +316,8 @@ struct hnae3_ae_ops {
        int (*set_loopback)(struct hnae3_handle *handle,
                            enum hnae3_loop loop_mode, bool en);
 
-       void (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc,
-                                bool en_mc_pmc);
+       int (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc,
+                               bool en_mc_pmc);
        int (*set_mtu)(struct hnae3_handle *handle, int new_mtu);
 
        void (*get_pauseparam)(struct hnae3_handle *handle,
@@ -391,7 +391,7 @@ struct hnae3_ae_ops {
                                      int vector_num,
                                      struct hnae3_ring_chain_node *vr_chain);
 
-       void (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
+       int (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
        u32 (*get_fw_version)(struct hnae3_handle *handle);
        void (*get_mdix_mode)(struct hnae3_handle *handle,
                              u8 *tp_mdix_ctrl, u8 *tp_mdix);
index 32f3aca814e78b530495956d7cafcb9017139176..3f96aa30068ec3dcf991bec4ed022b3e68d013e7 100644 (file)
@@ -509,16 +509,18 @@ static void hns3_nic_set_rx_mode(struct net_device *netdev)
        h->netdev_flags = new_flags;
 }
 
-void hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags)
+int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        struct hnae3_handle *h = priv->ae_handle;
 
        if (h->ae_algo->ops->set_promisc_mode) {
-               h->ae_algo->ops->set_promisc_mode(h,
-                                                 promisc_flags & HNAE3_UPE,
-                                                 promisc_flags & HNAE3_MPE);
+               return h->ae_algo->ops->set_promisc_mode(h,
+                                               promisc_flags & HNAE3_UPE,
+                                               promisc_flags & HNAE3_MPE);
        }
+
+       return 0;
 }
 
 void hns3_enable_vlan_filter(struct net_device *netdev, bool enable)
@@ -1494,18 +1496,22 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
        return ret;
 }
 
-static void hns3_restore_vlan(struct net_device *netdev)
+static int hns3_restore_vlan(struct net_device *netdev)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
+       int ret = 0;
        u16 vid;
-       int ret;
 
        for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
                ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
-               if (ret)
-                       netdev_warn(netdev, "Restore vlan: %d filter, ret:%d\n",
-                                   vid, ret);
+               if (ret) {
+                       netdev_err(netdev, "Restore vlan: %d filter, ret:%d\n",
+                                  vid, ret);
+                       return ret;
+               }
        }
+
+       return ret;
 }
 
 static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
@@ -2727,7 +2733,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
                        chain = devm_kzalloc(&pdev->dev, sizeof(*chain),
                                             GFP_KERNEL);
                        if (!chain)
-                               return -ENOMEM;
+                               goto err_free_chain;
 
                        cur_chain->next = chain;
                        chain->tqp_index = tx_ring->tqp->tqp_index;
@@ -2757,7 +2763,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
        while (rx_ring) {
                chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL);
                if (!chain)
-                       return -ENOMEM;
+                       goto err_free_chain;
 
                cur_chain->next = chain;
                chain->tqp_index = rx_ring->tqp->tqp_index;
@@ -2772,6 +2778,16 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
        }
 
        return 0;
+
+err_free_chain:
+       cur_chain = head->next;
+       while (cur_chain) {
+               chain = cur_chain->next;
+               devm_kfree(&pdev->dev, chain);
+               cur_chain = chain;
+       }
+
+       return -ENOMEM;
 }
 
 static void hns3_free_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
@@ -2821,7 +2837,7 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
        struct hnae3_handle *h = priv->ae_handle;
        struct hns3_enet_tqp_vector *tqp_vector;
        int ret = 0;
-       u16 i;
+       int i;
 
        hns3_nic_set_cpumask(priv);
 
@@ -2868,13 +2884,19 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
                hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
                if (ret)
-                       return ret;
+                       goto map_ring_fail;
 
                netif_napi_add(priv->netdev, &tqp_vector->napi,
                               hns3_nic_common_poll, NAPI_POLL_WEIGHT);
        }
 
        return 0;
+
+map_ring_fail:
+       while (i--)
+               netif_napi_del(&priv->tqp_vector[i].napi);
+
+       return ret;
 }
 
 static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
@@ -3031,8 +3053,10 @@ static int hns3_queue_to_ring(struct hnae3_queue *tqp,
                return ret;
 
        ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
-       if (ret)
+       if (ret) {
+               devm_kfree(priv->dev, priv->ring_data[tqp->tqp_index].ring);
                return ret;
+       }
 
        return 0;
 }
@@ -3059,6 +3083,12 @@ static int hns3_get_ring_config(struct hns3_nic_priv *priv)
 
        return 0;
 err:
+       while (i--) {
+               devm_kfree(priv->dev, priv->ring_data[i].ring);
+               devm_kfree(priv->dev,
+                          priv->ring_data[i + h->kinfo.num_tqps].ring);
+       }
+
        devm_kfree(&pdev->dev, priv->ring_data);
        return ret;
 }
@@ -3226,9 +3256,6 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
        int i;
 
        for (i = 0; i < h->kinfo.num_tqps; i++) {
-               if (h->ae_algo->ops->reset_queue)
-                       h->ae_algo->ops->reset_queue(h, i);
-
                hns3_fini_ring(priv->ring_data[i].ring);
                hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
        }
@@ -3236,11 +3263,12 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 }
 
 /* Set mac addr if it is configured. or leave it to the AE driver */
-static void hns3_init_mac_addr(struct net_device *netdev, bool init)
+static int hns3_init_mac_addr(struct net_device *netdev, bool init)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        struct hnae3_handle *h = priv->ae_handle;
        u8 mac_addr_temp[ETH_ALEN];
+       int ret = 0;
 
        if (h->ae_algo->ops->get_mac_addr && init) {
                h->ae_algo->ops->get_mac_addr(h, mac_addr_temp);
@@ -3255,8 +3283,9 @@ static void hns3_init_mac_addr(struct net_device *netdev, bool init)
        }
 
        if (h->ae_algo->ops->set_mac_addr)
-               h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
+               ret = h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
 
+       return ret;
 }
 
 static int hns3_restore_fd_rules(struct net_device *netdev)
@@ -3469,20 +3498,29 @@ err_out:
        return ret;
 }
 
-static void hns3_recover_hw_addr(struct net_device *ndev)
+static int hns3_recover_hw_addr(struct net_device *ndev)
 {
        struct netdev_hw_addr_list *list;
        struct netdev_hw_addr *ha, *tmp;
+       int ret = 0;
 
        /* go through and sync uc_addr entries to the device */
        list = &ndev->uc;
-       list_for_each_entry_safe(ha, tmp, &list->list, list)
-               hns3_nic_uc_sync(ndev, ha->addr);
+       list_for_each_entry_safe(ha, tmp, &list->list, list) {
+               ret = hns3_nic_uc_sync(ndev, ha->addr);
+               if (ret)
+                       return ret;
+       }
 
        /* go through and sync mc_addr entries to the device */
        list = &ndev->mc;
-       list_for_each_entry_safe(ha, tmp, &list->list, list)
-               hns3_nic_mc_sync(ndev, ha->addr);
+       list_for_each_entry_safe(ha, tmp, &list->list, list) {
+               ret = hns3_nic_mc_sync(ndev, ha->addr);
+               if (ret)
+                       return ret;
+       }
+
+       return ret;
 }
 
 static void hns3_remove_hw_addr(struct net_device *netdev)
@@ -3609,7 +3647,10 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
        int ret;
 
        for (i = 0; i < h->kinfo.num_tqps; i++) {
-               h->ae_algo->ops->reset_queue(h, i);
+               ret = h->ae_algo->ops->reset_queue(h, i);
+               if (ret)
+                       return ret;
+
                hns3_init_ring_hw(priv->ring_data[i].ring);
 
                /* We need to clear tx ring here because self test will
@@ -3701,18 +3742,30 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
        bool vlan_filter_enable;
        int ret;
 
-       hns3_init_mac_addr(netdev, false);
-       hns3_recover_hw_addr(netdev);
-       hns3_update_promisc_mode(netdev, handle->netdev_flags);
+       ret = hns3_init_mac_addr(netdev, false);
+       if (ret)
+               return ret;
+
+       ret = hns3_recover_hw_addr(netdev);
+       if (ret)
+               return ret;
+
+       ret = hns3_update_promisc_mode(netdev, handle->netdev_flags);
+       if (ret)
+               return ret;
+
        vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true;
        hns3_enable_vlan_filter(netdev, vlan_filter_enable);
 
-
        /* Hardware table is only clear when pf resets */
-       if (!(handle->flags & HNAE3_SUPPORT_VF))
-               hns3_restore_vlan(netdev);
+       if (!(handle->flags & HNAE3_SUPPORT_VF)) {
+               ret = hns3_restore_vlan(netdev);
+               return ret;
+       }
 
-       hns3_restore_fd_rules(netdev);
+       ret = hns3_restore_fd_rules(netdev);
+       if (ret)
+               return ret;
 
        /* Carrier off reporting is important to ethtool even BEFORE open */
        netif_carrier_off(netdev);
index 71cfca132d0bd044006f3a8e1a599973dbceb500..d3636d088aa3d960ae3bc2721d1257018286ae89 100644 (file)
@@ -640,7 +640,7 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
                                 u32 rl_value);
 
 void hns3_enable_vlan_filter(struct net_device *netdev, bool enable);
-void hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags);
+int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags);
 
 #ifdef CONFIG_HNS3_DCB
 void hns3_dcbnl_setup(struct hnae3_handle *handle);
index ac13cb2b168e5a6e67517837dd470e092a0db8f8..690f62ed87dcaa3b1997df510f0c06784e7c1eb6 100644 (file)
@@ -24,15 +24,15 @@ static int hclge_ring_space(struct hclge_cmq_ring *ring)
        return ring->desc_num - used - 1;
 }
 
-static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int h)
+static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int head)
 {
-       int u = ring->next_to_use;
-       int c = ring->next_to_clean;
+       int ntu = ring->next_to_use;
+       int ntc = ring->next_to_clean;
 
-       if (unlikely(h >= ring->desc_num))
-               return 0;
+       if (ntu > ntc)
+               return head >= ntc && head <= ntu;
 
-       return u > c ? (h > c && h <= u) : (h > c || h <= u);
+       return head >= ntc || head <= ntu;
 }
 
 static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring)
@@ -304,6 +304,10 @@ int hclge_cmd_queue_init(struct hclge_dev *hdev)
 {
        int ret;
 
+       /* Setup the lock for command queue */
+       spin_lock_init(&hdev->hw.cmq.csq.lock);
+       spin_lock_init(&hdev->hw.cmq.crq.lock);
+
        /* Setup the queue entries for use cmd queue */
        hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
        hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
@@ -337,18 +341,20 @@ int hclge_cmd_init(struct hclge_dev *hdev)
        u32 version;
        int ret;
 
+       spin_lock_bh(&hdev->hw.cmq.csq.lock);
+       spin_lock_bh(&hdev->hw.cmq.crq.lock);
+
        hdev->hw.cmq.csq.next_to_clean = 0;
        hdev->hw.cmq.csq.next_to_use = 0;
        hdev->hw.cmq.crq.next_to_clean = 0;
        hdev->hw.cmq.crq.next_to_use = 0;
 
-       /* Setup the lock for command queue */
-       spin_lock_init(&hdev->hw.cmq.csq.lock);
-       spin_lock_init(&hdev->hw.cmq.crq.lock);
-
        hclge_cmd_init_regs(&hdev->hw);
        clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 
+       spin_unlock_bh(&hdev->hw.cmq.crq.lock);
+       spin_unlock_bh(&hdev->hw.cmq.csq.lock);
+
        ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
        if (ret) {
                dev_err(&hdev->pdev->dev,
index dca6f2326c2672bf75f46613c2ed3389003090de..123c37e653f3eda4ad120970aa4cd19b094557f8 100644 (file)
@@ -751,7 +751,7 @@ static void hclge_process_ncsi_error(struct hclge_dev *hdev,
        ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd,
                                    HCLGE_NCSI_INT_CLR, 0);
        if (ret)
-               dev_err(dev, "failed(=%d) to clear NCSI intrerrupt status\n",
+               dev_err(dev, "failed(=%d) to clear NCSI interrupt status\n",
                        ret);
 }
 
index 5234b5373ed3b9257aad7ad3f480040694623721..ffdd96020860db0153d467814984d63efb01995e 100644 (file)
@@ -2236,7 +2236,7 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
        }
 
        /* clear the source of interrupt if it is not cause by reset */
-       if (event_cause != HCLGE_VECTOR0_EVENT_RST) {
+       if (event_cause == HCLGE_VECTOR0_EVENT_MBX) {
                hclge_clear_event_cause(hdev, event_cause, clearval);
                hclge_enable_vector(&hdev->misc_vector, true);
        }
@@ -2470,14 +2470,17 @@ static void hclge_reset(struct hclge_dev *hdev)
        handle = &hdev->vport[0].nic;
        rtnl_lock();
        hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+       rtnl_unlock();
 
        if (!hclge_reset_wait(hdev)) {
+               rtnl_lock();
                hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
                hclge_reset_ae_dev(hdev->ae_dev);
                hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
 
                hclge_clear_reset_cause(hdev);
        } else {
+               rtnl_lock();
                /* schedule again to check pending resets later */
                set_bit(hdev->reset_type, &hdev->reset_pending);
                hclge_reset_task_schedule(hdev);
@@ -3314,8 +3317,8 @@ void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
        param->vf_id = vport_id;
 }
 
-static void hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
-                                  bool en_mc_pmc)
+static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
+                                 bool en_mc_pmc)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_dev *hdev = vport->back;
@@ -3323,7 +3326,7 @@ static void hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
 
        hclge_promisc_param_init(&param, en_uc_pmc, en_mc_pmc, true,
                                 vport->vport_id);
-       hclge_cmd_set_promisc_mode(hdev, &param);
+       return hclge_cmd_set_promisc_mode(hdev, &param);
 }
 
 static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
@@ -6107,31 +6110,28 @@ static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
        return tqp->index;
 }
 
-void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_dev *hdev = vport->back;
        int reset_try_times = 0;
        int reset_status;
        u16 queue_gid;
-       int ret;
-
-       if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
-               return;
+       int ret = 0;
 
        queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
 
        ret = hclge_tqp_enable(hdev, queue_id, 0, false);
        if (ret) {
-               dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
-               return;
+               dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
+               return ret;
        }
 
        ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
        if (ret) {
-               dev_warn(&hdev->pdev->dev,
-                        "Send reset tqp cmd fail, ret = %d\n", ret);
-               return;
+               dev_err(&hdev->pdev->dev,
+                       "Send reset tqp cmd fail, ret = %d\n", ret);
+               return ret;
        }
 
        reset_try_times = 0;
@@ -6144,16 +6144,16 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
        }
 
        if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
-               dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
-               return;
+               dev_err(&hdev->pdev->dev, "Reset TQP fail\n");
+               return ret;
        }
 
        ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
-       if (ret) {
-               dev_warn(&hdev->pdev->dev,
-                        "Deassert the soft reset fail, ret = %d\n", ret);
-               return;
-       }
+       if (ret)
+               dev_err(&hdev->pdev->dev,
+                       "Deassert the soft reset fail, ret = %d\n", ret);
+
+       return ret;
 }
 
 void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
index e3dfd654eca9a1c4b1f0af7a90b29d91c725ca1b..0d92154042699c94a41410ab6dc3aa38437c9b3a 100644 (file)
@@ -778,7 +778,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev);
 void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
-void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
+int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
 void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
 int hclge_cfg_flowctrl(struct hclge_dev *hdev);
 int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
index 04462a347a94075bd28453b6db9088d77ef586c9..f890022938d9a15a96e98ab2c48cea04fc0dc784 100644 (file)
@@ -400,6 +400,12 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 
        /* handle all the mailbox requests in the queue */
        while (!hclge_cmd_crq_empty(&hdev->hw)) {
+               if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
+                       dev_warn(&hdev->pdev->dev,
+                                "command queue needs re-initializing\n");
+                       return;
+               }
+
                desc = &crq->desc[crq->next_to_use];
                req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
 
index 24b1f2a0c32afc328cbdc82b67d5adb5fae916cc..03018638f701b3f2824bb153864e13cf929bd06c 100644 (file)
@@ -52,7 +52,7 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum,
        struct hclge_desc desc;
        int ret;
 
-       if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+       if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state))
                return 0;
 
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false);
@@ -90,7 +90,7 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
        struct hclge_desc desc;
        int ret;
 
-       if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+       if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state))
                return 0;
 
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true);
index aa5cb9834d73a807dd18661c10069b7c929cc6d6..494e562fe8c7e9f2322b9659b8e60ec3abce26f0 100644 (file)
@@ -1168,14 +1168,14 @@ static int hclge_pfc_setup_hw(struct hclge_dev *hdev)
  */
 static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
 {
-       struct hclge_vport *vport = hdev->vport;
-       u32 i, k, qs_bitmap;
-       int ret;
+       int i;
 
        for (i = 0; i < HCLGE_BP_GRP_NUM; i++) {
-               qs_bitmap = 0;
+               u32 qs_bitmap = 0;
+               int k, ret;
 
                for (k = 0; k < hdev->num_alloc_vport; k++) {
+                       struct hclge_vport *vport = &hdev->vport[k];
                        u16 qs_id = vport->qs_offset + tc;
                        u8 grp, sub_grp;
 
@@ -1185,8 +1185,6 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
                                                  HCLGE_BP_SUB_GRP_ID_S);
                        if (i == grp)
                                qs_bitmap |= (1 << sub_grp);
-
-                       vport++;
                }
 
                ret = hclge_tm_qs_bp_cfg(hdev, tc, i, qs_bitmap);
index e0a86a58342c28dab0a6e044895f9c4635f46207..085edb945389c5fc8aba0d94f8ffe2c74f070f24 100644 (file)
@@ -925,12 +925,12 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
        return status;
 }
 
-static void hclgevf_set_promisc_mode(struct hnae3_handle *handle,
-                                    bool en_uc_pmc, bool en_mc_pmc)
+static int hclgevf_set_promisc_mode(struct hnae3_handle *handle,
+                                   bool en_uc_pmc, bool en_mc_pmc)
 {
        struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
-       hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc);
+       return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc);
 }
 
 static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id,
@@ -1080,7 +1080,7 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
                                    1, false, NULL, 0);
 }
 
-static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
        struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
        u8 msg_data[2];
@@ -1091,10 +1091,10 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
        /* disable vf queue before send queue reset msg to PF */
        ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
        if (ret)
-               return;
+               return ret;
 
-       hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
-                            2, true, NULL, 0);
+       return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
+                                   2, true, NULL, 0);
 }
 
 static int hclgevf_notify_client(struct hclgevf_dev *hdev,
@@ -1170,6 +1170,8 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
        /* bring down the nic to stop any ongoing TX/RX */
        hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
 
+       rtnl_unlock();
+
        /* check if VF could successfully fetch the hardware reset completion
         * status from the hardware
         */
@@ -1181,12 +1183,15 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
                        ret);
 
                dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n");
+               rtnl_lock();
                hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
 
                rtnl_unlock();
                return ret;
        }
 
+       rtnl_lock();
+
        /* now, re-initialize the nic client and ae device*/
        ret = hclgevf_reset_stack(hdev);
        if (ret)
index 967c993d5303ab87420c691abd1af11f4411560a..bbf9bdd0ee3e74246d558b4caa3cb3cfeb453e58 100644 (file)
@@ -532,7 +532,7 @@ void hinic_task_set_inner_l3(struct hinic_sq_task *task,
 }
 
 void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
-                             enum hinic_l4_offload_type l4_type,
+                             enum hinic_l4_tunnel_type l4_type,
                              u32 tunnel_len)
 {
        task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
index a0dc63a4bfc7ab9bd724fda8899650b37b4785e5..038522e202b6f616db097fbc4f0ef6d05755e682 100644 (file)
@@ -160,7 +160,7 @@ void hinic_task_set_inner_l3(struct hinic_sq_task *task,
                             u32 network_len);
 
 void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
-                             enum hinic_l4_offload_type l4_type,
+                             enum hinic_l4_tunnel_type l4_type,
                              u32 tunnel_len);
 
 void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
index fd3373d82a9e94c7850631b88c6b4c7c8940f1a1..59e1bc0f609ee3399130eec23e330111b2dc64e6 100644 (file)
@@ -200,6 +200,15 @@ config IXGBE_DCB
 
          If unsure, say N.
 
+config IXGBE_IPSEC
+       bool "IPSec XFRM cryptography-offload acceleration"
+       depends on IXGBE
+       depends on XFRM_OFFLOAD
+       default y
+       select XFRM_ALGO
+       ---help---
+         Enable support for IPSec offload in ixgbe.ko
+
 config IXGBEVF
        tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support"
        depends on PCI_MSI
@@ -217,6 +226,15 @@ config IXGBEVF
          will be called ixgbevf.  MSI-X interrupt support is required
          for this driver to work correctly.
 
+config IXGBEVF_IPSEC
+       bool "IPSec XFRM cryptography-offload acceleration"
+       depends on IXGBEVF
+       depends on XFRM_OFFLOAD
+       default y
+       select XFRM_ALGO
+       ---help---
+         Enable support for IPSec offload in ixgbevf.ko
+
 config I40E
        tristate "Intel(R) Ethernet Controller XL710 Family support"
        imply PTP_1588_CLOCK
index e707d717012faa997a127687ce45d54b27b9e3eb..5d4f1761dc0c2ef0613757ff5bfedb214f9ca79f 100644 (file)
@@ -244,7 +244,8 @@ process_mbx:
                }
 
                /* guarantee we have free space in the SM mailbox */
-               if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
+               if (hw->mbx.state == FM10K_STATE_OPEN &&
+                   !hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
                        /* keep track of how many times this occurs */
                        interface->hw_sm_mbx_full++;
 
@@ -302,6 +303,28 @@ void fm10k_iov_suspend(struct pci_dev *pdev)
        }
 }
 
+static void fm10k_mask_aer_comp_abort(struct pci_dev *pdev)
+{
+       u32 err_mask;
+       int pos;
+
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+       if (!pos)
+               return;
+
+       /* Mask the completion abort bit in the ERR_UNCOR_MASK register,
+        * preventing the device from reporting these errors to the upstream
+        * PCIe root device. This avoids bringing down platforms which upgrade
+        * non-fatal completer aborts into machine check exceptions. Completer
+        * aborts can occur whenever a VF reads a queue it doesn't own.
+        */
+       pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, &err_mask);
+       err_mask |= PCI_ERR_UNC_COMP_ABORT;
+       pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, err_mask);
+
+       mmiowb();
+}
+
 int fm10k_iov_resume(struct pci_dev *pdev)
 {
        struct fm10k_intfc *interface = pci_get_drvdata(pdev);
@@ -317,6 +340,12 @@ int fm10k_iov_resume(struct pci_dev *pdev)
        if (!iov_data)
                return -ENOMEM;
 
+       /* Lower severity of completer abort error reporting as
+        * the VFs can trigger this any time they read a queue
+        * that they don't own.
+        */
+       fm10k_mask_aer_comp_abort(pdev);
+
        /* allocate hardware resources for the VFs */
        hw->iov.ops.assign_resources(hw, num_vfs, num_vfs);
 
@@ -460,20 +489,6 @@ void fm10k_iov_disable(struct pci_dev *pdev)
        fm10k_iov_free_data(pdev);
 }
 
-static void fm10k_disable_aer_comp_abort(struct pci_dev *pdev)
-{
-       u32 err_sev;
-       int pos;
-
-       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
-       if (!pos)
-               return;
-
-       pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, &err_sev);
-       err_sev &= ~PCI_ERR_UNC_COMP_ABORT;
-       pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, err_sev);
-}
-
 int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
 {
        int current_vfs = pci_num_vf(pdev);
@@ -495,12 +510,6 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
 
        /* allocate VFs if not already allocated */
        if (num_vfs && num_vfs != current_vfs) {
-               /* Disable completer abort error reporting as
-                * the VFs can trigger this any time they read a queue
-                * that they don't own.
-                */
-               fm10k_disable_aer_comp_abort(pdev);
-
                err = pci_enable_sriov(pdev, num_vfs);
                if (err) {
                        dev_err(&pdev->dev,
index 503bbc0177922c45804bce8280452c7942663bf9..5b2a50e5798f755c4e360d5fec2d13c65f1ea2a1 100644 (file)
@@ -11,7 +11,7 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION    "0.23.4-k"
+#define DRV_VERSION    "0.26.1-k"
 #define DRV_SUMMARY    "Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
index 02345d3813036cac695d675ae24b9a02aea8510c..e49fb51d36133ff49c6b11646f895b1b7dc8ee61 100644 (file)
@@ -23,6 +23,8 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
  */
 static const struct pci_device_id fm10k_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, FM10K_DEV_ID_PF), fm10k_device_pf },
+       { PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2), fm10k_device_pf },
+       { PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_DA2), fm10k_device_pf },
        { PCI_VDEVICE(INTEL, FM10K_DEV_ID_VF), fm10k_device_vf },
        /* required last entry */
        { 0, }
index 3e608e493f9df6bf921383f65be2139ad38cb288..9fb9fca375e3f69282c757fb3ae665a18982e8f3 100644 (file)
@@ -15,6 +15,8 @@ struct fm10k_hw;
 
 #define FM10K_DEV_ID_PF                        0x15A4
 #define FM10K_DEV_ID_VF                        0x15A5
+#define FM10K_DEV_ID_SDI_FM10420_QDA2  0x15D0
+#define FM10K_DEV_ID_SDI_FM10420_DA2   0x15D5
 
 #define FM10K_MAX_QUEUES               256
 #define FM10K_MAX_QUEUES_PF            128
index 81b0e1f8d14b6d041e4a8668b0f20955b9633e55..ac5698ed0b11194a30496b4bb4f11cafb26938c5 100644 (file)
@@ -3674,7 +3674,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
                dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n",
                        local_vf_id, v_opcode, msglen);
                switch (ret) {
-               case VIRTCHNL_ERR_PARAM:
+               case VIRTCHNL_STATUS_ERR_PARAM:
                        return -EPERM;
                default:
                        return -EINVAL;
index 9f4d700e09df33cb5d3e17576859a563f9c6c52c..29ced6b74d364632113e9674f8d005257e548411 100644 (file)
  *
  * The 40 bit 82580 SYSTIM overflows every
  *   2^40 * 10^-9 /  60  = 18.3 minutes.
+ *
+ * SYSTIM is converted to real time using a timecounter. As
+ * timecounter_cyc2time() allows old timestamps, the timecounter
+ * needs to be updated at least once per half of the SYSTIM interval.
+ * Scheduling of delayed work is not very accurate, so we aim for 8
+ * minutes to be sure the actual interval is shorter than 9.16 minutes.
  */
 
-#define IGB_SYSTIM_OVERFLOW_PERIOD     (HZ * 60 * 9)
+#define IGB_SYSTIM_OVERFLOW_PERIOD     (HZ * 60 * 8)
 #define IGB_PTP_TX_TIMEOUT             (HZ * 15)
 #define INCPERIOD_82576                        BIT(E1000_TIMINCA_16NS_SHIFT)
 #define INCVALUE_82576_MASK            GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0)
index ca6b0c458e4a50eca96aea8ace64806cbf66aa45..4fb0d9e3f2da21db5acf17dbf5b9c091502db442 100644 (file)
@@ -17,4 +17,4 @@ ixgbe-$(CONFIG_IXGBE_DCB) +=  ixgbe_dcb.o ixgbe_dcb_82598.o \
 ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o
 ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o
 ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o
-ixgbe-$(CONFIG_XFRM_OFFLOAD) += ixgbe_ipsec.o
+ixgbe-$(CONFIG_IXGBE_IPSEC) += ixgbe_ipsec.o
index ec1b87cc44100904bf7b486692bc1d06b256fc80..143bdd5ee2a088a738a5fb381e33c141d774e633 100644 (file)
@@ -769,9 +769,9 @@ struct ixgbe_adapter {
 #define IXGBE_RSS_KEY_SIZE     40  /* size of RSS Hash Key in bytes */
        u32 *rss_key;
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
        struct ixgbe_ipsec *ipsec;
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBE_IPSEC */
 
        /* AF_XDP zero-copy */
        struct xdp_umem **xsk_umems;
@@ -1008,7 +1008,7 @@ void ixgbe_store_key(struct ixgbe_adapter *adapter);
 void ixgbe_store_reta(struct ixgbe_adapter *adapter);
 s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
                       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
@@ -1036,5 +1036,5 @@ static inline int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter,
                                        u32 *mbuf, u32 vf) { return -EACCES; }
 static inline int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter,
                                        u32 *mbuf, u32 vf) { return -EACCES; }
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBE_IPSEC */
 #endif /* _IXGBE_H_ */
index 0049a2becd7e7349db1cc6d7acf653489882277c..113b38e0defbf547920a5f2fadb63428e50e6dc1 100644 (file)
@@ -8694,7 +8694,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 
 #endif /* IXGBE_FCOE */
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
        if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
                goto out_drop;
 #endif
@@ -10190,7 +10190,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
         * the TSO, so it's the exception.
         */
        if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
                if (!skb->sp)
 #endif
                        features &= ~NETIF_F_TSO;
@@ -10883,7 +10883,7 @@ skip_sriov:
        if (hw->mac.type >= ixgbe_mac_82599EB)
                netdev->features |= NETIF_F_SCTP_CRC;
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 #define IXGBE_ESP_FEATURES     (NETIF_F_HW_ESP | \
                                 NETIF_F_HW_ESP_TX_CSUM | \
                                 NETIF_F_GSO_ESP)
index af25a8fffeb8ba4f19a79f11f6d96f3d7f252047..5dacfc870259881f8746a72546f5c410f4bf06f6 100644 (file)
@@ -722,8 +722,10 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
                        ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
                                        adapter->default_up, vf);
 
-               if (vfinfo->spoofchk_enabled)
+               if (vfinfo->spoofchk_enabled) {
                        hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf);
+                       hw->mac.ops.set_mac_anti_spoofing(hw, true, vf);
+               }
        }
 
        /* reset multicast table array for vf */
index 297d0f0858b59eba441397d0f6924558e7836768..186a4bb24fdea58a483f161858333313c34373ae 100644 (file)
@@ -10,5 +10,5 @@ ixgbevf-objs := vf.o \
                 mbx.o \
                 ethtool.o \
                 ixgbevf_main.o
-ixgbevf-$(CONFIG_XFRM_OFFLOAD) += ipsec.o
+ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o
 
index e399e1c0c54ab178648632cbe187e50d7bd5f0f3..ecab686574b65d23dd7840d840b26c2c002338f9 100644 (file)
@@ -459,7 +459,7 @@ int ethtool_ioctl(struct ifreq *ifr);
 
 extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector);
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBEVF_IPSEC
 void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter);
 void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter);
 void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter);
@@ -482,7 +482,7 @@ static inline int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
                                   struct ixgbevf_tx_buffer *first,
                                   struct ixgbevf_ipsec_tx_data *itd)
 { return 0; }
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBEVF_IPSEC */
 
 void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
 void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
index 98707ee11d72667a3ba83212ea301eeb27ba09d7..5e47ede7e832001a17575616fc0ab337d4d2f7fe 100644 (file)
@@ -4150,7 +4150,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
        first->tx_flags = tx_flags;
        first->protocol = vlan_get_protocol(skb);
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBEVF_IPSEC
        if (skb->sp && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
                goto out_drop;
 #endif
index 176c6b56fdccda6bc65f9c78f2b29fa88c6ed867..398328f107437e2f7c17e5fe54d0d8f36d83f058 100644 (file)
@@ -796,6 +796,7 @@ struct mvpp2_queue_vector {
        int nrxqs;
        u32 pending_cause_rx;
        struct mvpp2_port *port;
+       struct cpumask *mask;
 };
 
 struct mvpp2_port {
index 14f9679c957c6afd04929ea23ccd80afd19650ab..7a37a37e3fb347d90679db2ca16231597ba9afad 100644 (file)
@@ -3298,24 +3298,30 @@ static int mvpp2_irqs_init(struct mvpp2_port *port)
        for (i = 0; i < port->nqvecs; i++) {
                struct mvpp2_queue_vector *qv = port->qvecs + i;
 
-               if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE)
+               if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) {
+                       qv->mask = kzalloc(cpumask_size(), GFP_KERNEL);
+                       if (!qv->mask) {
+                               err = -ENOMEM;
+                               goto err;
+                       }
+
                        irq_set_status_flags(qv->irq, IRQ_NO_BALANCING);
+               }
 
                err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv);
                if (err)
                        goto err;
 
                if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) {
-                       unsigned long mask = 0;
                        unsigned int cpu;
 
                        for_each_present_cpu(cpu) {
                                if (mvpp2_cpu_to_thread(port->priv, cpu) ==
                                    qv->sw_thread_id)
-                                       mask |= BIT(cpu);
+                                       cpumask_set_cpu(cpu, qv->mask);
                        }
 
-                       irq_set_affinity_hint(qv->irq, to_cpumask(&mask));
+                       irq_set_affinity_hint(qv->irq, qv->mask);
                }
        }
 
@@ -3325,6 +3331,8 @@ err:
                struct mvpp2_queue_vector *qv = port->qvecs + i;
 
                irq_set_affinity_hint(qv->irq, NULL);
+               kfree(qv->mask);
+               qv->mask = NULL;
                free_irq(qv->irq, qv);
        }
 
@@ -3339,6 +3347,8 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port)
                struct mvpp2_queue_vector *qv = port->qvecs + i;
 
                irq_set_affinity_hint(qv->irq, NULL);
+               kfree(qv->mask);
+               qv->mask = NULL;
                irq_clear_status_flags(qv->irq, IRQ_NO_BALANCING);
                free_irq(qv->irq, qv);
        }
index 5a6d0919533d6e0e619927abd753c5d07ed95dac..db00bf1c23f5ad31d64652ddc8bee32e2e7534c8 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/vmalloc.h>
 #include <linux/irq.h>
 
+#include <net/ip.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_checksum.h>
 #endif
index 1857ee0f0871d48285a6d3711f7c3e9a1e08a05f..6f5153afcab4dfc331c099da854c54f1b9500887 100644 (file)
@@ -1006,7 +1006,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
                ring->packets++;
        }
        ring->bytes += tx_info->nr_bytes;
-       netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
        AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
 
        if (tx_info->inl)
@@ -1044,7 +1043,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
                netif_tx_stop_queue(ring->tx_queue);
                ring->queue_stopped++;
        }
-       send_doorbell = !skb->xmit_more || netif_xmit_stopped(ring->tx_queue);
+
+       send_doorbell = __netdev_tx_sent_queue(ring->tx_queue,
+                                              tx_info->nr_bytes,
+                                              skb->xmit_more);
 
        real_size = (real_size / 16) & 0x3f;
 
index 94224c22ecc310a87b6715051e335446f29bec03..79638dcbae78395fb723c9bf3fa877e7a42d91cd 100644 (file)
@@ -713,43 +713,15 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
        rq->stats->ecn_mark += !!rc;
 }
 
-static __be32 mlx5e_get_fcs(struct sk_buff *skb)
+static u32 mlx5e_get_fcs(const struct sk_buff *skb)
 {
-       int last_frag_sz, bytes_in_prev, nr_frags;
-       u8 *fcs_p1, *fcs_p2;
-       skb_frag_t *last_frag;
-       __be32 fcs_bytes;
+       const void *fcs_bytes;
+       u32 _fcs_bytes;
 
-       if (!skb_is_nonlinear(skb))
-               return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN);
+       fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
+                                      ETH_FCS_LEN, &_fcs_bytes);
 
-       nr_frags = skb_shinfo(skb)->nr_frags;
-       last_frag = &skb_shinfo(skb)->frags[nr_frags - 1];
-       last_frag_sz = skb_frag_size(last_frag);
-
-       /* If all FCS data is in last frag */
-       if (last_frag_sz >= ETH_FCS_LEN)
-               return *(__be32 *)(skb_frag_address(last_frag) +
-                                  last_frag_sz - ETH_FCS_LEN);
-
-       fcs_p2 = (u8 *)skb_frag_address(last_frag);
-       bytes_in_prev = ETH_FCS_LEN - last_frag_sz;
-
-       /* Find where the other part of the FCS is - Linear or another frag */
-       if (nr_frags == 1) {
-               fcs_p1 = skb_tail_pointer(skb);
-       } else {
-               skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2];
-
-               fcs_p1 = skb_frag_address(prev_frag) +
-                           skb_frag_size(prev_frag);
-       }
-       fcs_p1 -= bytes_in_prev;
-
-       memcpy(&fcs_bytes, fcs_p1, bytes_in_prev);
-       memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz);
-
-       return fcs_bytes;
+       return __get_unaligned_cpu32(fcs_bytes);
 }
 
 static u8 get_ip_proto(struct sk_buff *skb, __be16 proto)
@@ -797,8 +769,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
                                                 network_depth - ETH_HLEN,
                                                 skb->csum);
                if (unlikely(netdev->features & NETIF_F_RXFCS))
-                       skb->csum = csum_add(skb->csum,
-                                            (__force __wsum)mlx5e_get_fcs(skb));
+                       skb->csum = csum_block_add(skb->csum,
+                                                  (__force __wsum)mlx5e_get_fcs(skb),
+                                                  skb->len - ETH_FCS_LEN);
                stats->csum_complete++;
                return;
        }
index 937d0ace699a7eeb4e04af3bf54eebde5dd5d459..30f751e696980d727a86200e1748adda13bb8a22 100644 (file)
@@ -943,8 +943,8 @@ static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
                                             mlxsw_core->bus,
                                             mlxsw_core->bus_priv, true,
                                             devlink);
-       if (err)
-               mlxsw_core->reload_fail = true;
+       mlxsw_core->reload_fail = !!err;
+
        return err;
 }
 
@@ -1083,8 +1083,15 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 {
        struct devlink *devlink = priv_to_devlink(mlxsw_core);
 
-       if (mlxsw_core->reload_fail)
-               goto reload_fail;
+       if (mlxsw_core->reload_fail) {
+               if (!reload)
+                       /* Only the parts that were not de-initialized in the
+                        * failed reload attempt need to be de-initialized.
+                        */
+                       goto reload_fail_deinit;
+               else
+                       return;
+       }
 
        if (mlxsw_core->driver->fini)
                mlxsw_core->driver->fini(mlxsw_core);
@@ -1098,9 +1105,12 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
        if (!reload)
                devlink_resources_unregister(devlink, NULL);
        mlxsw_core->bus->fini(mlxsw_core->bus_priv);
-       if (reload)
-               return;
-reload_fail:
+
+       return;
+
+reload_fail_deinit:
+       devlink_unregister(devlink);
+       devlink_resources_unregister(devlink, NULL);
        devlink_free(devlink);
 }
 EXPORT_SYMBOL(mlxsw_core_bus_device_unregister);
index 32cb6718bb173ff966639a2ffc63374e457b586a..db3d2790aeecf9c3b93fe4c66df7856d637dfc70 100644 (file)
@@ -3284,7 +3284,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port,
  * Configures the ETS elements.
  */
 #define MLXSW_REG_QEEC_ID 0x400D
-#define MLXSW_REG_QEEC_LEN 0x1C
+#define MLXSW_REG_QEEC_LEN 0x20
 
 MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
 
@@ -3326,6 +3326,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8);
  */
 MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
 
+/* reg_qeec_mise
+ * Min shaper configuration enable. Enables configuration of the min
+ * shaper on this ETS element
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
+
 enum {
        MLXSW_REG_QEEC_BYTES_MODE,
        MLXSW_REG_QEEC_PACKETS_MODE,
@@ -3342,6 +3351,17 @@ enum {
  */
 MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1);
 
+/* The smallest permitted min shaper rate. */
+#define MLXSW_REG_QEEC_MIS_MIN 200000          /* Kbps */
+
+/* reg_qeec_min_shaper_rate
+ * Min shaper information rate.
+ * For CPU port, can only be configured for port hierarchy.
+ * When in bytes mode, value is specified in units of 1000bps.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
+
 /* reg_qeec_mase
  * Max shaper configuration enable. Enables configuration of the max
  * shaper on this ETS element.
index 8a4983adae940a08b4d4d5ec39637522fb1bea46..9bec940330a450856d2dba23ed7274321cf82059 100644 (file)
@@ -2740,6 +2740,21 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
 }
 
+static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port,
+                                   enum mlxsw_reg_qeec_hr hr, u8 index,
+                                   u8 next_index, u32 minrate)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+       mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+                           next_index);
+       mlxsw_reg_qeec_mise_set(qeec_pl, true);
+       mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
 int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
                              u8 switch_prio, u8 tclass)
 {
@@ -2817,6 +2832,16 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
                        return err;
        }
 
+       /* Configure the min shaper for multicast TCs. */
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
+                                              MLXSW_REG_QEEC_HIERARCY_TC,
+                                              i + 8, i,
+                                              MLXSW_REG_QEEC_MIS_MIN);
+               if (err)
+                       return err;
+       }
+
        /* Map all priorities to traffic class 0. */
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0);
@@ -3543,7 +3568,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
                        burst_size = 7;
                        break;
                case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
-                       is_bytes = true;
                        rate = 4 * 1024;
                        burst_size = 4;
                        break;
index bc60d7a8b49d764b4066c50bc808963c52c27950..739a51f0a366f421074a5ac31a6ec887898fb76a 100644 (file)
@@ -2661,8 +2661,6 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
                break;
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
                fdb_info = &switchdev_work->fdb_info;
-               if (!fdb_info->added_by_user)
-                       break;
                mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false);
                break;
        case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
index f40f654398a0782457240fa74bf81e0c65d7bf32..a96364df43203dbfe9b326a385a50b53dd1900c9 100644 (file)
@@ -1944,9 +1944,12 @@ int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
                             struct qed_ptt *p_ptt, u32 *p_speed_mask)
 {
        u32 transceiver_type, transceiver_state;
+       int ret;
 
-       qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state,
-                                    &transceiver_type);
+       ret = qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state,
+                                          &transceiver_type);
+       if (ret)
+               return ret;
 
        if (qed_is_transceiver_ready(transceiver_state, transceiver_type) ==
                                     false)
index b72ef171477e0ec6dfba85c09815226a584ec30b..bdd351597b55251b35e17daf43aeff643912e33b 100644 (file)
@@ -243,7 +243,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
  */
 int stmmac_mdio_reset(struct mii_bus *bus)
 {
-#if defined(CONFIG_STMMAC_PLATFORM)
+#if IS_ENABLED(CONFIG_STMMAC_PLATFORM)
        struct net_device *ndev = bus->priv;
        struct stmmac_priv *priv = netdev_priv(ndev);
        unsigned int mii_address = priv->hw->mii.addr;
index b12023bc2cab5feb15ceedbe2fc357dfcf37627e..a5bab614ff8459788493297bdcaa897106f7f1ba 100644 (file)
@@ -71,7 +71,6 @@ static unsigned int tx_start = 10;
 static unsigned int tx_stop = 5;
 
 struct ntb_netdev {
-       struct list_head list;
        struct pci_dev *pdev;
        struct net_device *ndev;
        struct ntb_transport_qp *qp;
@@ -81,8 +80,6 @@ struct ntb_netdev {
 #define        NTB_TX_TIMEOUT_MS       1000
 #define        NTB_RXQ_SIZE            100
 
-static LIST_HEAD(dev_list);
-
 static void ntb_netdev_event_handler(void *data, int link_is_up)
 {
        struct net_device *ndev = data;
@@ -236,7 +233,7 @@ static void ntb_netdev_tx_timer(struct timer_list *t)
        struct net_device *ndev = dev->ndev;
 
        if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) {
-               mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time));
+               mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
        } else {
                /* Make sure anybody stopping the queue after this sees the new
                 * value of ntb_transport_tx_free_entry()
@@ -452,7 +449,7 @@ static int ntb_netdev_probe(struct device *client_dev)
        if (rc)
                goto err1;
 
-       list_add(&dev->list, &dev_list);
+       dev_set_drvdata(client_dev, ndev);
        dev_info(&pdev->dev, "%s created\n", ndev->name);
        return 0;
 
@@ -465,27 +462,8 @@ err:
 
 static void ntb_netdev_remove(struct device *client_dev)
 {
-       struct ntb_dev *ntb;
-       struct net_device *ndev;
-       struct pci_dev *pdev;
-       struct ntb_netdev *dev;
-       bool found = false;
-
-       ntb = dev_ntb(client_dev->parent);
-       pdev = ntb->pdev;
-
-       list_for_each_entry(dev, &dev_list, list) {
-               if (dev->pdev == pdev) {
-                       found = true;
-                       break;
-               }
-       }
-       if (!found)
-               return;
-
-       list_del(&dev->list);
-
-       ndev = dev->ndev;
+       struct net_device *ndev = dev_get_drvdata(client_dev);
+       struct ntb_netdev *dev = netdev_priv(ndev);
 
        unregister_netdev(ndev);
        ntb_transport_free_queue(dev->qp);
index 7fc8508b5231d94beab4c45bf7666d15d4ef786f..271e8adc39f1005dcc48b678ef528d442f12b9f8 100644 (file)
@@ -220,7 +220,7 @@ static struct phy_driver realtek_drvs[] = {
                .flags          = PHY_HAS_INTERRUPT,
        }, {
                .phy_id         = 0x001cc816,
-               .name           = "RTL8201F 10/100Mbps Ethernet",
+               .name           = "RTL8201F Fast Ethernet",
                .phy_id_mask    = 0x001fffff,
                .features       = PHY_BASIC_FEATURES,
                .flags          = PHY_HAS_INTERRUPT,
index 262e7a3c23cb67fbfd66b81ed0d26af0f0480d84..2d17f3b9bb16568c19275864b325e700c4ff5e88 100644 (file)
@@ -1598,6 +1598,8 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
                return ret;
        }
 
+       cancel_delayed_work_sync(&pdata->carrier_check);
+
        if (pdata->suspend_flags) {
                netdev_warn(dev->net, "error during last resume\n");
                pdata->suspend_flags = 0;
@@ -1840,6 +1842,11 @@ done:
         */
        if (ret && PMSG_IS_AUTO(message))
                usbnet_resume(intf);
+
+       if (ret)
+               schedule_delayed_work(&pdata->carrier_check,
+                                     CARRIER_CHECK_DELAY);
+
        return ret;
 }
 
index b360e5613b9f102e74bd2a3e0bd2548d949ed951..f8948cf515ce3936bb7dc859b555a3a28b59f08b 100644 (file)
@@ -1,6 +1,7 @@
 config NTB_IDT
        tristate "IDT PCIe-switch Non-Transparent Bridge support"
        depends on PCI
+       select HWMON
        help
         This driver supports NTB of cappable IDT PCIe-switches.
 
@@ -23,9 +24,7 @@ config NTB_IDT
         BAR settings of peer NT-functions, the BAR setups can't be done over
         kernel PCI fixups. That's why the alternative pre-initialization
         techniques like BIOS using SMBus interface or EEPROM should be
-        utilized. Additionally if one needs to have temperature sensor
-        information printed to system log, the corresponding registers must
-        be initialized within BIOS/EEPROM as well.
+        utilized.
 
         If unsure, say N.
 
index dbe72f116017ab305a0a1f19276155700f69211e..1dede87dd54fadd2a0c6a5336f708e2ea968b5d0 100644 (file)
@@ -4,7 +4,7 @@
  *
  *   GPL LICENSE SUMMARY
  *
- *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *   Copyright (C) 2016-2018 T-Platforms JSC All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify it
  *   under the terms and conditions of the GNU General Public License,
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/debugfs.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
 #include <linux/ntb.h>
 
 #include "ntb_hw_idt.h"
@@ -1105,9 +1108,9 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port,
        }
 
        /* Allocate memory for memory window descriptors */
-       ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt,
-                               sizeof(*ret_mws), GFP_KERNEL);
-       if (IS_ERR_OR_NULL(ret_mws))
+       ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt, sizeof(*ret_mws),
+                              GFP_KERNEL);
+       if (!ret_mws)
                return ERR_PTR(-ENOMEM);
 
        /* Copy the info of detected memory windows */
@@ -1320,7 +1323,7 @@ static int idt_ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
                idt_nt_write(ndev, bar->ltbase, (u32)addr);
                idt_nt_write(ndev, bar->utbase, (u32)(addr >> 32));
                /* Set the custom BAR aperture limit */
-               limit = pci_resource_start(ntb->pdev, mw_cfg->bar) + size;
+               limit = pci_bus_address(ntb->pdev, mw_cfg->bar) + size;
                idt_nt_write(ndev, bar->limit, (u32)limit);
                if (IS_FLD_SET(BARSETUP_TYPE, data, 64))
                        idt_nt_write(ndev, (bar + 1)->limit, (limit >> 32));
@@ -1821,61 +1824,284 @@ static int idt_ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx,
  *                      7. Temperature sensor operations
  *
  *    IDT PCIe-switch has an embedded temperature sensor, which can be used to
- * warn a user-space of possible chip overheating. Since workload temperature
- * can be different on different platforms, temperature thresholds as well as
- * general sensor settings must be setup in the framework of BIOS/EEPROM
- * initializations. It includes the actual sensor enabling as well.
+ * check current chip core temperature. Since a workload environment can be
+ * different on different platforms, an offset and ADC/filter settings can be
+ * specified. Although the offset configuration is only exposed to the sysfs
+ * hwmon interface at the moment. The rest of the settings can be adjusted
+ * for instance by the BIOS/EEPROM firmware.
  *=============================================================================
  */
 
+/*
+ * idt_get_deg() - convert millidegree Celsius value to just degree
+ * @mdegC:     IN - millidegree Celsius value
+ *
+ * Return: Degree corresponding to the passed millidegree value
+ */
+static inline s8 idt_get_deg(long mdegC)
+{
+       return mdegC / 1000;
+}
+
+/*
+ * idt_get_frac() - retrieve 0/0.5 fraction of the millidegree Celsius value
+ * @mdegC:     IN - millidegree Celsius value
+ *
+ * Return: 0/0.5 degree fraction of the passed millidegree value
+ */
+static inline u8 idt_get_deg_frac(long mdegC)
+{
+       return (mdegC % 1000) >= 500 ? 5 : 0;
+}
+
+/*
+ * idt_get_temp_fmt() - convert millidegree Celsius value to 0:7:1 format
+ * @mdegC:     IN - millidegree Celsius value
+ *
+ * Return: 0:7:1 format acceptable by the IDT temperature sensor
+ */
+static inline u8 idt_temp_get_fmt(long mdegC)
+{
+       return (idt_get_deg(mdegC) << 1) | (idt_get_deg_frac(mdegC) ? 1 : 0);
+}
+
+/*
+ * idt_get_temp_sval() - convert temp sample to signed millidegree Celsius
+ * @data:      IN - shifted to LSB 8-bits temperature sample
+ *
+ * Return: signed millidegree Celsius
+ */
+static inline long idt_get_temp_sval(u32 data)
+{
+       return ((s8)data / 2) * 1000 + (data & 0x1 ? 500 : 0);
+}
+
+/*
+ * idt_get_temp_sval() - convert temp sample to unsigned millidegree Celsius
+ * @data:      IN - shifted to LSB 8-bits temperature sample
+ *
+ * Return: unsigned millidegree Celsius
+ */
+static inline long idt_get_temp_uval(u32 data)
+{
+       return (data / 2) * 1000 + (data & 0x1 ? 500 : 0);
+}
+
 /*
  * idt_read_temp() - read temperature from chip sensor
  * @ntb:       NTB device context.
- * @val:       OUT - integer value of temperature
- * @frac:      OUT - fraction
+ * @type:      IN - type of the temperature value to read
+ * @val:       OUT - integer value of temperature in millidegree Celsius
  */
-static void idt_read_temp(struct idt_ntb_dev *ndev, unsigned char *val,
-                         unsigned char *frac)
+static void idt_read_temp(struct idt_ntb_dev *ndev,
+                         const enum idt_temp_val type, long *val)
 {
        u32 data;
 
-       /* Read the data from TEMP field of the TMPSTS register */
-       data = idt_sw_read(ndev, IDT_SW_TMPSTS);
-       data = GET_FIELD(TMPSTS_TEMP, data);
-       /* TEMP field has one fractional bit and seven integer bits */
-       *val = data >> 1;
-       *frac = ((data & 0x1) ? 5 : 0);
+       /* Alter the temperature field in accordance with the passed type */
+       switch (type) {
+       case IDT_TEMP_CUR:
+               data = GET_FIELD(TMPSTS_TEMP,
+                                idt_sw_read(ndev, IDT_SW_TMPSTS));
+               break;
+       case IDT_TEMP_LOW:
+               data = GET_FIELD(TMPSTS_LTEMP,
+                                idt_sw_read(ndev, IDT_SW_TMPSTS));
+               break;
+       case IDT_TEMP_HIGH:
+               data = GET_FIELD(TMPSTS_HTEMP,
+                                idt_sw_read(ndev, IDT_SW_TMPSTS));
+               break;
+       case IDT_TEMP_OFFSET:
+               /* This is the only field with signed 0:7:1 format */
+               data = GET_FIELD(TMPADJ_OFFSET,
+                                idt_sw_read(ndev, IDT_SW_TMPADJ));
+               *val = idt_get_temp_sval(data);
+               return;
+       default:
+               data = GET_FIELD(TMPSTS_TEMP,
+                                idt_sw_read(ndev, IDT_SW_TMPSTS));
+               break;
+       }
+
+       /* The rest of the fields accept unsigned 0:7:1 format */
+       *val = idt_get_temp_uval(data);
 }
 
 /*
- * idt_temp_isr() - temperature sensor alarm events ISR
- * @ndev:      IDT NTB hardware driver descriptor
- * @ntint_sts: NT-function interrupt status
+ * idt_write_temp() - write temperature to the chip sensor register
+ * @ntb:       NTB device context.
+ * @type:      IN - type of the temperature value to change
+ * @val:       IN - integer value of temperature in millidegree Celsius
+ */
+static void idt_write_temp(struct idt_ntb_dev *ndev,
+                          const enum idt_temp_val type, const long val)
+{
+       unsigned int reg;
+       u32 data;
+       u8 fmt;
+
+       /* Retrieve the properly formatted temperature value */
+       fmt = idt_temp_get_fmt(val);
+
+       mutex_lock(&ndev->hwmon_mtx);
+       switch (type) {
+       case IDT_TEMP_LOW:
+               reg = IDT_SW_TMPALARM;
+               data = SET_FIELD(TMPALARM_LTEMP, idt_sw_read(ndev, reg), fmt) &
+                       ~IDT_TMPALARM_IRQ_MASK;
+               break;
+       case IDT_TEMP_HIGH:
+               reg = IDT_SW_TMPALARM;
+               data = SET_FIELD(TMPALARM_HTEMP, idt_sw_read(ndev, reg), fmt) &
+                       ~IDT_TMPALARM_IRQ_MASK;
+               break;
+       case IDT_TEMP_OFFSET:
+               reg = IDT_SW_TMPADJ;
+               data = SET_FIELD(TMPADJ_OFFSET, idt_sw_read(ndev, reg), fmt);
+               break;
+       default:
+               goto inval_spin_unlock;
+       }
+
+       idt_sw_write(ndev, reg, data);
+
+inval_spin_unlock:
+       mutex_unlock(&ndev->hwmon_mtx);
+}
+
+/*
+ * idt_sysfs_show_temp() - printout corresponding temperature value
+ * @dev:       Pointer to the NTB device structure
+ * @da:                Sensor device attribute structure
+ * @buf:       Buffer to print temperature out
  *
- * It handles events of temperature crossing alarm thresholds. Since reading
- * of TMPALARM register clears it up, the function doesn't analyze the
- * read value, instead the current temperature value just warningly printed to
- * log.
- * The method is called from PCIe ISR bottom-half routine.
+ * Return: Number of written symbols or negative error
  */
-static void idt_temp_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+static ssize_t idt_sysfs_show_temp(struct device *dev,
+                                  struct device_attribute *da, char *buf)
 {
-       unsigned char val, frac;
+       struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+       struct idt_ntb_dev *ndev = dev_get_drvdata(dev);
+       enum idt_temp_val type = attr->index;
+       long mdeg;
 
-       /* Read the current temperature value */
-       idt_read_temp(ndev, &val, &frac);
+       idt_read_temp(ndev, type, &mdeg);
+       return sprintf(buf, "%ld\n", mdeg);
+}
 
-       /* Read the temperature alarm to clean the alarm status out */
-       /*(void)idt_sw_read(ndev, IDT_SW_TMPALARM);*/
+/*
+ * idt_sysfs_set_temp() - set corresponding temperature value
+ * @dev:       Pointer to the NTB device structure
+ * @da:                Sensor device attribute structure
+ * @buf:       Buffer to print temperature out
+ * @count:     Size of the passed buffer
+ *
+ * Return: Number of written symbols or negative error
+ */
+static ssize_t idt_sysfs_set_temp(struct device *dev,
+                                 struct device_attribute *da, const char *buf,
+                                 size_t count)
+{
+       struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+       struct idt_ntb_dev *ndev = dev_get_drvdata(dev);
+       enum idt_temp_val type = attr->index;
+       long mdeg;
+       int ret;
 
-       /* Clean the corresponding interrupt bit */
-       idt_nt_write(ndev, IDT_NT_NTINTSTS, IDT_NTINTSTS_TMPSENSOR);
+       ret = kstrtol(buf, 10, &mdeg);
+       if (ret)
+               return ret;
+
+       /* Clamp the passed value in accordance with the type */
+       if (type == IDT_TEMP_OFFSET)
+               mdeg = clamp_val(mdeg, IDT_TEMP_MIN_OFFSET,
+                                IDT_TEMP_MAX_OFFSET);
+       else
+               mdeg = clamp_val(mdeg, IDT_TEMP_MIN_MDEG, IDT_TEMP_MAX_MDEG);
+
+       idt_write_temp(ndev, type, mdeg);
+
+       return count;
+}
+
+/*
+ * idt_sysfs_reset_hist() - reset temperature history
+ * @dev:       Pointer to the NTB device structure
+ * @da:                Sensor device attribute structure
+ * @buf:       Buffer to print temperature out
+ * @count:     Size of the passed buffer
+ *
+ * Return: Number of written symbols or negative error
+ */
+static ssize_t idt_sysfs_reset_hist(struct device *dev,
+                                   struct device_attribute *da,
+                                   const char *buf, size_t count)
+{
+       struct idt_ntb_dev *ndev = dev_get_drvdata(dev);
+
+       /* Just set the maximal value to the lowest temperature field and
+        * minimal value to the highest temperature field
+        */
+       idt_write_temp(ndev, IDT_TEMP_LOW, IDT_TEMP_MAX_MDEG);
+       idt_write_temp(ndev, IDT_TEMP_HIGH, IDT_TEMP_MIN_MDEG);
 
-       dev_dbg(&ndev->ntb.pdev->dev,
-               "Temp sensor IRQ detected %#08x", ntint_sts);
+       return count;
+}
+
+/*
+ * Hwmon IDT sysfs attributes
+ */
+static SENSOR_DEVICE_ATTR(temp1_input, 0444, idt_sysfs_show_temp, NULL,
+                         IDT_TEMP_CUR);
+static SENSOR_DEVICE_ATTR(temp1_lowest, 0444, idt_sysfs_show_temp, NULL,
+                         IDT_TEMP_LOW);
+static SENSOR_DEVICE_ATTR(temp1_highest, 0444, idt_sysfs_show_temp, NULL,
+                         IDT_TEMP_HIGH);
+static SENSOR_DEVICE_ATTR(temp1_offset, 0644, idt_sysfs_show_temp,
+                         idt_sysfs_set_temp, IDT_TEMP_OFFSET);
+static DEVICE_ATTR(temp1_reset_history, 0200, NULL, idt_sysfs_reset_hist);
 
-       /* Print temperature value to log */
-       dev_warn(&ndev->ntb.pdev->dev, "Temperature %hhu.%hhu", val, frac);
+/*
+ * Hwmon IDT sysfs attributes group
+ */
+static struct attribute *idt_temp_attrs[] = {
+       &sensor_dev_attr_temp1_input.dev_attr.attr,
+       &sensor_dev_attr_temp1_lowest.dev_attr.attr,
+       &sensor_dev_attr_temp1_highest.dev_attr.attr,
+       &sensor_dev_attr_temp1_offset.dev_attr.attr,
+       &dev_attr_temp1_reset_history.attr,
+       NULL
+};
+ATTRIBUTE_GROUPS(idt_temp);
+
+/*
+ * idt_init_temp() - initialize temperature sensor interface
+ * @ndev:      IDT NTB hardware driver descriptor
+ *
+ * Simple sensor initializarion method is responsible for device switching
+ * on and resource management based hwmon interface registration. Note, that
+ * since the device is shared we won't disable it on remove, but leave it
+ * working until the system is powered off.
+ */
+static void idt_init_temp(struct idt_ntb_dev *ndev)
+{
+       struct device *hwmon;
+
+       /* Enable sensor if it hasn't been already */
+       idt_sw_write(ndev, IDT_SW_TMPCTL, 0x0);
+
+       /* Initialize hwmon interface fields */
+       mutex_init(&ndev->hwmon_mtx);
+
+       hwmon = devm_hwmon_device_register_with_groups(&ndev->ntb.pdev->dev,
+               ndev->swcfg->name, ndev, idt_temp_groups);
+       if (IS_ERR(hwmon)) {
+               dev_err(&ndev->ntb.pdev->dev, "Couldn't create hwmon device");
+               return;
+       }
+
+       dev_dbg(&ndev->ntb.pdev->dev, "Temperature HWmon interface registered");
 }
 
 /*=============================================================================
@@ -1931,7 +2157,7 @@ static int idt_init_isr(struct idt_ntb_dev *ndev)
                goto err_free_vectors;
        }
 
-       /* Unmask Message/Doorbell/SE/Temperature interrupts */
+       /* Unmask Message/Doorbell/SE interrupts */
        ntint_mask = idt_nt_read(ndev, IDT_NT_NTINTMSK) & ~IDT_NTINTMSK_ALL;
        idt_nt_write(ndev, IDT_NT_NTINTMSK, ntint_mask);
 
@@ -1946,7 +2172,6 @@ err_free_vectors:
        return ret;
 }
 
-
 /*
  * idt_deinit_ist() - deinitialize PCIe interrupt handler
  * @ndev:      IDT NTB hardware driver descriptor
@@ -2007,12 +2232,6 @@ static irqreturn_t idt_thread_isr(int irq, void *devid)
                handled = true;
        }
 
-       /* Handle temperature sensor interrupt */
-       if (ntint_sts & IDT_NTINTSTS_TMPSENSOR) {
-               idt_temp_isr(ndev, ntint_sts);
-               handled = true;
-       }
-
        dev_dbg(&ndev->ntb.pdev->dev, "IDT IRQs 0x%08x handled", ntint_sts);
 
        return handled ? IRQ_HANDLED : IRQ_NONE;
@@ -2123,9 +2342,9 @@ static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
                                   size_t count, loff_t *offp)
 {
        struct idt_ntb_dev *ndev = filp->private_data;
-       unsigned char temp, frac, idx, pidx, cnt;
+       unsigned char idx, pidx, cnt;
+       unsigned long irqflags, mdeg;
        ssize_t ret = 0, off = 0;
-       unsigned long irqflags;
        enum ntb_speed speed;
        enum ntb_width width;
        char *strbuf;
@@ -2274,9 +2493,10 @@ static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
        off += scnprintf(strbuf + off, size - off, "\n");
 
        /* Current temperature */
-       idt_read_temp(ndev, &temp, &frac);
+       idt_read_temp(ndev, IDT_TEMP_CUR, &mdeg);
        off += scnprintf(strbuf + off, size - off,
-               "Switch temperature\t\t- %hhu.%hhuC\n", temp, frac);
+               "Switch temperature\t\t- %hhd.%hhuC\n",
+               idt_get_deg(mdeg), idt_get_deg_frac(mdeg));
 
        /* Copy the buffer to the User Space */
        ret = simple_read_from_buffer(ubuf, count, offp, strbuf, off);
@@ -2390,7 +2610,7 @@ static struct idt_ntb_dev *idt_create_dev(struct pci_dev *pdev,
 
        /* Allocate memory for the IDT PCIe-device descriptor */
        ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
-       if (IS_ERR_OR_NULL(ndev)) {
+       if (!ndev) {
                dev_err(&pdev->dev, "Memory allocation failed for descriptor");
                return ERR_PTR(-ENOMEM);
        }
@@ -2571,6 +2791,9 @@ static int idt_pci_probe(struct pci_dev *pdev,
        /* Initialize Messaging subsystem */
        idt_init_msg(ndev);
 
+       /* Initialize hwmon interface */
+       idt_init_temp(ndev);
+
        /* Initialize IDT interrupts handler */
        ret = idt_init_isr(ndev);
        if (ret != 0)
index 856fd182f6f4f08816bd8523c338823187caf43c..2f1aa121b0cf381d788eb968f45fd2069a3b99e0 100644 (file)
@@ -4,7 +4,7 @@
  *
  *   GPL LICENSE SUMMARY
  *
- *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *   Copyright (C) 2016-2018 T-Platforms JSC All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify it
  *   under the terms and conditions of the GNU General Public License,
@@ -47,9 +47,9 @@
 #include <linux/pci_ids.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/ntb.h>
 
-
 /*
  * Macro is used to create the struct pci_device_id that matches
  * the supported IDT PCIe-switches
  * @IDT_NTINTMSK_DBELL:                Doorbell interrupt mask bit
  * @IDT_NTINTMSK_SEVENT:       Switch Event interrupt mask bit
  * @IDT_NTINTMSK_TMPSENSOR:    Temperature sensor interrupt mask bit
- * @IDT_NTINTMSK_ALL:          All the useful interrupts mask
+ * @IDT_NTINTMSK_ALL:          NTB-related interrupts mask
  */
 #define IDT_NTINTMSK_MSG               0x00000001U
 #define IDT_NTINTMSK_DBELL             0x00000002U
 #define IDT_NTINTMSK_SEVENT            0x00000008U
 #define IDT_NTINTMSK_TMPSENSOR         0x00000080U
 #define IDT_NTINTMSK_ALL \
-       (IDT_NTINTMSK_MSG | IDT_NTINTMSK_DBELL | \
-        IDT_NTINTMSK_SEVENT | IDT_NTINTMSK_TMPSENSOR)
+       (IDT_NTINTMSK_MSG | IDT_NTINTMSK_DBELL | IDT_NTINTMSK_SEVENT)
 
 /*
  * NTGSIGNAL register fields related constants
 #define IDT_SWPxMSGCTL_PART_MASK       0x00000070U
 #define IDT_SWPxMSGCTL_PART_FLD                4
 
+/*
+ * TMPCTL register fields related constants
+ * @IDT_TMPCTL_LTH_MASK:       Low temperature threshold field mask
+ * @IDT_TMPCTL_LTH_FLD:                Low temperature threshold field offset
+ * @IDT_TMPCTL_MTH_MASK:       Middle temperature threshold field mask
+ * @IDT_TMPCTL_MTH_FLD:                Middle temperature threshold field offset
+ * @IDT_TMPCTL_HTH_MASK:       High temperature threshold field mask
+ * @IDT_TMPCTL_HTH_FLD:                High temperature threshold field offset
+ * @IDT_TMPCTL_PDOWN:          Temperature sensor power down
+ */
+#define IDT_TMPCTL_LTH_MASK            0x000000FFU
+#define IDT_TMPCTL_LTH_FLD             0
+#define IDT_TMPCTL_MTH_MASK            0x0000FF00U
+#define IDT_TMPCTL_MTH_FLD             8
+#define IDT_TMPCTL_HTH_MASK            0x00FF0000U
+#define IDT_TMPCTL_HTH_FLD             16
+#define IDT_TMPCTL_PDOWN               0x80000000U
+
 /*
  * TMPSTS register fields related constants
  * @IDT_TMPSTS_TEMP_MASK:      Current temperature field mask
  * @IDT_TMPSTS_TEMP_FLD:       Current temperature field offset
+ * @IDT_TMPSTS_LTEMP_MASK:     Lowest temperature field mask
+ * @IDT_TMPSTS_LTEMP_FLD:      Lowest temperature field offset
+ * @IDT_TMPSTS_HTEMP_MASK:     Highest temperature field mask
+ * @IDT_TMPSTS_HTEMP_FLD:      Highest temperature field offset
  */
 #define IDT_TMPSTS_TEMP_MASK           0x000000FFU
 #define IDT_TMPSTS_TEMP_FLD            0
+#define IDT_TMPSTS_LTEMP_MASK          0x0000FF00U
+#define IDT_TMPSTS_LTEMP_FLD           8
+#define IDT_TMPSTS_HTEMP_MASK          0x00FF0000U
+#define IDT_TMPSTS_HTEMP_FLD           16
+
+/*
+ * TMPALARM register fields related constants
+ * @IDT_TMPALARM_LTEMP_MASK:   Lowest temperature field mask
+ * @IDT_TMPALARM_LTEMP_FLD:    Lowest temperature field offset
+ * @IDT_TMPALARM_HTEMP_MASK:   Highest temperature field mask
+ * @IDT_TMPALARM_HTEMP_FLD:    Highest temperature field offset
+ * @IDT_TMPALARM_IRQ_MASK:     Alarm IRQ status mask
+ */
+#define IDT_TMPALARM_LTEMP_MASK                0x0000FF00U
+#define IDT_TMPALARM_LTEMP_FLD         8
+#define IDT_TMPALARM_HTEMP_MASK                0x00FF0000U
+#define IDT_TMPALARM_HTEMP_FLD         16
+#define IDT_TMPALARM_IRQ_MASK          0x3F000000U
+
+/*
+ * TMPADJ register fields related constants
+ * @IDT_TMPADJ_OFFSET_MASK:    Temperature value offset field mask
+ * @IDT_TMPADJ_OFFSET_FLD:     Temperature value offset field offset
+ */
+#define IDT_TMPADJ_OFFSET_MASK         0x000000FFU
+#define IDT_TMPADJ_OFFSET_FLD          0
 
 /*
  * Helper macro to get/set the corresponding field value
 #define IDT_TRANS_ALIGN                4
 #define IDT_DIR_SIZE_ALIGN     1
 
+/*
+ * IDT PCIe-switch temperature sensor value limits
+ * @IDT_TEMP_MIN_MDEG: Minimal integer value of temperature
+ * @IDT_TEMP_MAX_MDEG: Maximal integer value of temperature
+ * @IDT_TEMP_MIN_OFFSET:Minimal integer value of temperature offset
+ * @IDT_TEMP_MAX_OFFSET:Maximal integer value of temperature offset
+ */
+#define IDT_TEMP_MIN_MDEG      0
+#define IDT_TEMP_MAX_MDEG      127500
+#define IDT_TEMP_MIN_OFFSET    -64000
+#define IDT_TEMP_MAX_OFFSET    63500
+
+/*
+ * Temperature sensor values enumeration
+ * @IDT_TEMP_CUR:      Current temperature
+ * @IDT_TEMP_LOW:      Lowest historical temperature
+ * @IDT_TEMP_HIGH:     Highest historical temperature
+ * @IDT_TEMP_OFFSET:   Current temperature offset
+ */
+enum idt_temp_val {
+       IDT_TEMP_CUR,
+       IDT_TEMP_LOW,
+       IDT_TEMP_HIGH,
+       IDT_TEMP_OFFSET
+};
+
 /*
  * IDT Memory Windows type. Depending on the device settings, IDT supports
  * Direct Address Translation MW registers and Lookup Table registers
@@ -1044,6 +1117,8 @@ struct idt_ntb_peer {
  * @msg_mask_lock:     Message mask register lock
  * @gasa_lock:         GASA registers access lock
  *
+ * @hwmon_mtx:         Temperature sensor interface update mutex
+ *
  * @dbgfs_info:                DebugFS info node
  */
 struct idt_ntb_dev {
@@ -1071,6 +1146,8 @@ struct idt_ntb_dev {
        spinlock_t msg_mask_lock;
        spinlock_t gasa_lock;
 
+       struct mutex hwmon_mtx;
+
        struct dentry *dbgfs_info;
 };
 #define to_ndev_ntb(__ntb) container_of(__ntb, struct idt_ntb_dev, ntb)
index 6aa57322727916bd5bc1c8e5ab13f286f8fd1b1d..2ad263f708da7ab68b12c9767058df9505501013 100644 (file)
@@ -265,7 +265,7 @@ static inline int ndev_db_clear_mask(struct intel_ntb_dev *ndev, u64 db_bits,
        return 0;
 }
 
-static inline int ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector)
+static inline u64 ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector)
 {
        u64 shift, mask;
 
index 9398959664769b5f6cd3c79e4914199d1cf35f0d..3bfdb4562408879fd04f340b05e84673c4da2232 100644 (file)
@@ -194,6 +194,8 @@ struct ntb_transport_mw {
        void __iomem *vbase;
        size_t xlat_size;
        size_t buff_size;
+       size_t alloc_size;
+       void *alloc_addr;
        void *virt_addr;
        dma_addr_t dma_addr;
 };
@@ -672,13 +674,59 @@ static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
                return;
 
        ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
-       dma_free_coherent(&pdev->dev, mw->buff_size,
-                         mw->virt_addr, mw->dma_addr);
+       dma_free_coherent(&pdev->dev, mw->alloc_size,
+                         mw->alloc_addr, mw->dma_addr);
        mw->xlat_size = 0;
        mw->buff_size = 0;
+       mw->alloc_size = 0;
+       mw->alloc_addr = NULL;
        mw->virt_addr = NULL;
 }
 
+static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw,
+                              struct device *dma_dev, size_t align)
+{
+       dma_addr_t dma_addr;
+       void *alloc_addr, *virt_addr;
+       int rc;
+
+       alloc_addr = dma_alloc_coherent(dma_dev, mw->alloc_size,
+                                       &dma_addr, GFP_KERNEL);
+       if (!alloc_addr) {
+               dev_err(dma_dev, "Unable to alloc MW buff of size %zu\n",
+                       mw->alloc_size);
+               return -ENOMEM;
+       }
+       virt_addr = alloc_addr;
+
+       /*
+        * we must ensure that the memory address allocated is BAR size
+        * aligned in order for the XLAT register to take the value. This
+        * is a requirement of the hardware. It is recommended to setup CMA
+        * for BAR sizes equal or greater than 4MB.
+        */
+       if (!IS_ALIGNED(dma_addr, align)) {
+               if (mw->alloc_size > mw->buff_size) {
+                       virt_addr = PTR_ALIGN(alloc_addr, align);
+                       dma_addr = ALIGN(dma_addr, align);
+               } else {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+       }
+
+       mw->alloc_addr = alloc_addr;
+       mw->virt_addr = virt_addr;
+       mw->dma_addr = dma_addr;
+
+       return 0;
+
+err:
+       dma_free_coherent(dma_dev, mw->alloc_size, alloc_addr, dma_addr);
+
+       return rc;
+}
+
 static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
                      resource_size_t size)
 {
@@ -710,28 +758,20 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
        /* Alloc memory for receiving data.  Must be aligned */
        mw->xlat_size = xlat_size;
        mw->buff_size = buff_size;
+       mw->alloc_size = buff_size;
 
-       mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size,
-                                          &mw->dma_addr, GFP_KERNEL);
-       if (!mw->virt_addr) {
-               mw->xlat_size = 0;
-               mw->buff_size = 0;
-               dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n",
-                       buff_size);
-               return -ENOMEM;
-       }
-
-       /*
-        * we must ensure that the memory address allocated is BAR size
-        * aligned in order for the XLAT register to take the value. This
-        * is a requirement of the hardware. It is recommended to setup CMA
-        * for BAR sizes equal or greater than 4MB.
-        */
-       if (!IS_ALIGNED(mw->dma_addr, xlat_align)) {
-               dev_err(&pdev->dev, "DMA memory %pad is not aligned\n",
-                       &mw->dma_addr);
-               ntb_free_mw(nt, num_mw);
-               return -ENOMEM;
+       rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+       if (rc) {
+               mw->alloc_size *= 2;
+               rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+               if (rc) {
+                       dev_err(&pdev->dev,
+                               "Unable to alloc aligned MW buff\n");
+                       mw->xlat_size = 0;
+                       mw->buff_size = 0;
+                       mw->alloc_size = 0;
+                       return rc;
+               }
        }
 
        /* Notify HW the memory location of the receive buffer */
@@ -1278,6 +1318,7 @@ static void ntb_rx_copy_callback(void *data,
                case DMA_TRANS_READ_FAILED:
                case DMA_TRANS_WRITE_FAILED:
                        entry->errors++;
+                       /* fall through */
                case DMA_TRANS_ABORTED:
                {
                        struct ntb_transport_qp *qp = entry->qp;
@@ -1533,6 +1574,7 @@ static void ntb_tx_copy_callback(void *data,
                case DMA_TRANS_READ_FAILED:
                case DMA_TRANS_WRITE_FAILED:
                        entry->errors++;
+                       /* fall through */
                case DMA_TRANS_ABORTED:
                {
                        void __iomem *offset =
index 2e65be8b1387af92a2b099478c36e5a28e6d7ffc..559d567693b8d060b920952e8429d219471b9569 100644 (file)
@@ -1519,8 +1519,10 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
        if (ns->ndev)
                nvme_nvm_update_nvm_info(ns);
 #ifdef CONFIG_NVME_MULTIPATH
-       if (ns->head->disk)
+       if (ns->head->disk) {
                nvme_update_disk_info(ns->head->disk, ns, id);
+               blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
+       }
 #endif
 }
 
index e52b9d3c0bd6cc22e1ac6b97736426267bb761f6..0b70c8bab045ac1a544cf3c72eb1a8766152fcd2 100644 (file)
@@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
        op->fcp_req.rspaddr = &op->rsp_iu;
        op->fcp_req.rsplen = sizeof(op->rsp_iu);
        op->fcp_req.done = nvme_fc_fcpio_done;
-       op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE];
        op->ctrl = ctrl;
        op->queue = queue;
        op->rq = rq;
@@ -1752,6 +1751,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
        if (res)
                return res;
        op->op.fcp_req.first_sgl = &op->sgl[0];
+       op->op.fcp_req.private = &op->priv[0];
        return res;
 }
 
index 5e3cc8c59a394fce6ba25f1c621b26903185963d..9901afd804ce3720709c198fb54140d2a2ea3d85 100644 (file)
@@ -285,6 +285,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
        blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
        /* set to a default value for 512 until disk is validated */
        blk_queue_logical_block_size(q, 512);
+       blk_set_stacking_limits(&q->limits);
 
        /* we need to propagate up the VMC settings */
        if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
index f30031945ee4b671033a0d80583d05c0724a096e..c33bb201b8846fae0319e1c575f9737af79a1944 100644 (file)
@@ -1663,6 +1663,9 @@ static void nvme_map_cmb(struct nvme_dev *dev)
        struct pci_dev *pdev = to_pci_dev(dev->dev);
        int bar;
 
+       if (dev->cmb_size)
+               return;
+
        dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
        if (!dev->cmbsz)
                return;
@@ -2147,7 +2150,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 {
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
-       nvme_release_cmb(dev);
        pci_free_irq_vectors(pdev);
 
        if (pci_is_enabled(pdev)) {
@@ -2595,6 +2597,7 @@ static void nvme_remove(struct pci_dev *pdev)
        nvme_stop_ctrl(&dev->ctrl);
        nvme_remove_namespaces(&dev->ctrl);
        nvme_dev_disable(dev, true);
+       nvme_release_cmb(dev);
        nvme_free_host_mem(dev);
        nvme_dev_remove_admin(dev);
        nvme_free_queues(dev, 0);
index f4efe289dc7bc2caa8ce3c2a1a44b97e66cd0324..a5f9bbce863f42dcff6c23759801fa9c48d0b210 100644 (file)
@@ -420,7 +420,7 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
        struct pci_dev *p2p_dev;
        int ret;
 
-       if (!ctrl->p2p_client)
+       if (!ctrl->p2p_client || !ns->use_p2pmem)
                return;
 
        if (ns->p2p_dev) {
index 39d972e2595f0dc764f2a5ac37d589422068c139..01feebec29ea2d671d2d110a8a4df7148cc2f616 100644 (file)
@@ -101,7 +101,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
                rw = READ;
        }
 
-       iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+       iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
 
        iocb->ki_pos = pos;
        iocb->ki_filp = req->ns->file;
index ddce100be57a48f883558e147669a3d06f1046bb..3f7971d3706d90d5fbf382072ed8d2da2ac6e8b6 100644 (file)
@@ -122,7 +122,6 @@ struct nvmet_rdma_device {
        int                     inline_page_count;
 };
 
-static struct workqueue_struct *nvmet_rdma_delete_wq;
 static bool nvmet_rdma_use_srq;
 module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
 MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
@@ -1274,12 +1273,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 
        if (queue->host_qid == 0) {
                /* Let inflight controller teardown complete */
-               flush_workqueue(nvmet_rdma_delete_wq);
+               flush_scheduled_work();
        }
 
        ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
        if (ret) {
-               queue_work(nvmet_rdma_delete_wq, &queue->release_work);
+               schedule_work(&queue->release_work);
                /* Destroying rdma_cm id is not needed here */
                return 0;
        }
@@ -1344,7 +1343,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
 
        if (disconnect) {
                rdma_disconnect(queue->cm_id);
-               queue_work(nvmet_rdma_delete_wq, &queue->release_work);
+               schedule_work(&queue->release_work);
        }
 }
 
@@ -1374,7 +1373,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
        mutex_unlock(&nvmet_rdma_queue_mutex);
 
        pr_err("failed to connect queue %d\n", queue->idx);
-       queue_work(nvmet_rdma_delete_wq, &queue->release_work);
+       schedule_work(&queue->release_work);
 }
 
 /**
@@ -1656,17 +1655,8 @@ static int __init nvmet_rdma_init(void)
        if (ret)
                goto err_ib_client;
 
-       nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq",
-                       WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
-       if (!nvmet_rdma_delete_wq) {
-               ret = -ENOMEM;
-               goto err_unreg_transport;
-       }
-
        return 0;
 
-err_unreg_transport:
-       nvmet_unregister_transport(&nvmet_rdma_ops);
 err_ib_client:
        ib_unregister_client(&nvmet_rdma_ib_client);
        return ret;
@@ -1674,7 +1664,6 @@ err_ib_client:
 
 static void __exit nvmet_rdma_exit(void)
 {
-       destroy_workqueue(nvmet_rdma_delete_wq);
        nvmet_unregister_transport(&nvmet_rdma_ops);
        ib_unregister_client(&nvmet_rdma_ib_client);
        WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list));
index d023cf303d56c3a557881a55818b9ee424e74162..09692c9b32a71c8f080653af4312b56ad55467a4 100644 (file)
@@ -777,8 +777,6 @@ struct device_node *of_get_next_cpu_node(struct device_node *prev)
                if (!(of_node_name_eq(next, "cpu") ||
                      (next->type && !of_node_cmp(next->type, "cpu"))))
                        continue;
-               if (!__of_device_is_available(next))
-                       continue;
                if (of_node_get(next))
                        break;
        }
index 0f27fad9fe940de645f61f868d0578795e073abc..5592437bb3d155aa415ee23511e404aa2afe9ba3 100644 (file)
@@ -149,9 +149,11 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
         * set by the driver.
         */
        mask = DMA_BIT_MASK(ilog2(dma_addr + size - 1) + 1);
-       dev->bus_dma_mask = mask;
        dev->coherent_dma_mask &= mask;
        *dev->dma_mask &= mask;
+       /* ...but only set bus mask if we found valid dma-ranges earlier */
+       if (!ret)
+               dev->bus_dma_mask = mask;
 
        coherent = of_dma_is_coherent(np);
        dev_dbg(dev, "device is%sdma coherent\n",
index 35c64a4295e07edc9b5f59ebaed18d3ab2ef1c52..fe6b13608e5101458254d4f522454df65dc2e8fa 100644 (file)
@@ -104,9 +104,14 @@ static int __init of_numa_parse_distance_map_v1(struct device_node *map)
                distance = of_read_number(matrix, 1);
                matrix++;
 
+               if ((nodea == nodeb && distance != LOCAL_DISTANCE) ||
+                   (nodea != nodeb && distance <= LOCAL_DISTANCE)) {
+                       pr_err("Invalid distance[node%d -> node%d] = %d\n",
+                              nodea, nodeb, distance);
+                       return -EINVAL;
+               }
+
                numa_set_distance(nodea, nodeb, distance);
-               pr_debug("distance[node%d -> node%d] = %d\n",
-                        nodea, nodeb, distance);
 
                /* Set default distance of node B->A same as A->B */
                if (nodeb > nodea)
index bdac939de223c9242385bfd77c23a343258fc687..54f6a40c75c69c667ac3d4591dde89fd99bac3be 100644 (file)
@@ -60,7 +60,10 @@ config ACERHDF
 
          After loading this driver the BIOS is still in control of the fan.
          To let the kernel handle the fan, do:
-         echo -n enabled > /sys/class/thermal/thermal_zone0/mode
+         echo -n enabled > /sys/class/thermal/thermal_zoneN/mode
+         where N=0,1,2... depending on the number of thermal nodes and the
+         detection order of your particular system.  The "type" parameter
+         in the same node directory will tell you if it is "acerhdf".
 
          For more information about this driver see
          <http://piie.net/files/acerhdf_README.txt>
@@ -105,6 +108,22 @@ config ASUS_LAPTOP
 
          If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
+config DCDBAS
+       tristate "Dell Systems Management Base Driver"
+       depends on X86
+       help
+         The Dell Systems Management Base Driver provides a sysfs interface
+         for systems management software to perform System Management
+         Interrupts (SMIs) and Host Control Actions (system power cycle or
+         power off after OS shutdown) on certain Dell systems.
+
+         See <file:Documentation/dcdbas.txt> for more details on the driver
+         and the Dell systems on which Dell systems management software makes
+         use of this driver.
+
+         Say Y or M here to enable the driver for use by Dell systems
+         management software such as Dell OpenManage.
+
 #
 # The DELL_SMBIOS driver depends on ACPI_WMI and/or DCDBAS if those
 # backends are selected. The "depends" line prevents a configuration
@@ -227,6 +246,18 @@ config DELL_RBTN
          To compile this driver as a module, choose M here: the module will
          be called dell-rbtn.
 
+config DELL_RBU
+       tristate "BIOS update support for DELL systems via sysfs"
+       depends on X86
+       select FW_LOADER
+       select FW_LOADER_USER_HELPER
+       help
+        Say m if you want to have the option of updating the BIOS for your
+        DELL system. Note you need a Dell OpenManage or Dell Update package (DUP)
+        supporting application to communicate with the BIOS regarding the new
+        image for the image update to take effect.
+        See <file:Documentation/dell_rbu.txt> for more details on the driver.
+
 
 config FUJITSU_LAPTOP
        tristate "Fujitsu Laptop Extras"
@@ -336,6 +367,20 @@ config HP_WMI
         To compile this driver as a module, choose M here: the module will
         be called hp-wmi.
 
+config LG_LAPTOP
+       tristate "LG Laptop Extras"
+       depends on ACPI
+       depends on ACPI_WMI
+       depends on INPUT
+       select INPUT_SPARSEKMAP
+       select LEDS_CLASS
+       help
+        This driver adds support for hotkeys as well as control of keyboard
+        backlight, battery maximum charge level and various other ACPI
+        features.
+
+        If you have an LG Gram laptop, say Y or M here.
+
 config MSI_LAPTOP
        tristate "MSI Laptop Extras"
        depends on ACPI
@@ -1231,6 +1276,18 @@ config I2C_MULTI_INSTANTIATE
          To compile this driver as a module, choose M here: the module
          will be called i2c-multi-instantiate.
 
+config INTEL_ATOMISP2_PM
+       tristate "Intel AtomISP2 dummy / power-management driver"
+       depends on PCI && IOSF_MBI && PM
+       help
+         Power-management driver for Intel's Image Signal Processor found on
+         Bay and Cherry Trail devices. This dummy driver's sole purpose is to
+         turn the ISP off (put it in D3) to save power and to allow entering
+         of S0ix modes.
+
+         To compile this driver as a module, choose M here: the module
+         will be called intel_atomisp2_pm.
+
 endif # X86_PLATFORM_DEVICES
 
 config PMC_ATOM
index e6d1becf81ce8c6bd526e196b8c167576d0f5c6f..39ae94135406b69bb7cef9bcb8d24242382afe35 100644 (file)
@@ -9,9 +9,11 @@ obj-$(CONFIG_ASUS_NB_WMI)      += asus-nb-wmi.o
 obj-$(CONFIG_ASUS_WIRELESS)    += asus-wireless.o
 obj-$(CONFIG_EEEPC_LAPTOP)     += eeepc-laptop.o
 obj-$(CONFIG_EEEPC_WMI)                += eeepc-wmi.o
+obj-$(CONFIG_LG_LAPTOP)                += lg-laptop.o
 obj-$(CONFIG_MSI_LAPTOP)       += msi-laptop.o
 obj-$(CONFIG_ACPI_CMPC)                += classmate-laptop.o
 obj-$(CONFIG_COMPAL_LAPTOP)    += compal-laptop.o
+obj-$(CONFIG_DCDBAS)           += dcdbas.o
 obj-$(CONFIG_DELL_SMBIOS)      += dell-smbios.o
 dell-smbios-objs               := dell-smbios-base.o
 dell-smbios-$(CONFIG_DELL_SMBIOS_WMI)  += dell-smbios-wmi.o
@@ -23,6 +25,7 @@ obj-$(CONFIG_DELL_WMI_AIO)    += dell-wmi-aio.o
 obj-$(CONFIG_DELL_WMI_LED)     += dell-wmi-led.o
 obj-$(CONFIG_DELL_SMO8800)     += dell-smo8800.o
 obj-$(CONFIG_DELL_RBTN)                += dell-rbtn.o
+obj-$(CONFIG_DELL_RBU)          += dell_rbu.o
 obj-$(CONFIG_ACER_WMI)         += acer-wmi.o
 obj-$(CONFIG_ACER_WIRELESS)    += acer-wireless.o
 obj-$(CONFIG_ACERHDF)          += acerhdf.o
@@ -92,3 +95,4 @@ obj-$(CONFIG_MLX_PLATFORM)    += mlx-platform.o
 obj-$(CONFIG_INTEL_TURBO_MAX_3) += intel_turbo_max_3.o
 obj-$(CONFIG_INTEL_CHTDC_TI_PWRBTN)    += intel_chtdc_ti_pwrbtn.o
 obj-$(CONFIG_I2C_MULTI_INSTANTIATE)    += i2c-multi-instantiate.o
+obj-$(CONFIG_INTEL_ATOMISP2_PM)        += intel_atomisp2_pm.o
index ea22591ee66feb9c08d8a831cbb822d8b9ab7591..50522422537886c26c849af01b18694be049387b 100644 (file)
@@ -86,6 +86,7 @@ static unsigned int interval = 10;
 static unsigned int fanon = 60000;
 static unsigned int fanoff = 53000;
 static unsigned int verbose;
+static unsigned int list_supported;
 static unsigned int fanstate = ACERHDF_FAN_AUTO;
 static char force_bios[16];
 static char force_product[16];
@@ -104,10 +105,12 @@ module_param(fanoff, uint, 0600);
 MODULE_PARM_DESC(fanoff, "Turn the fan off below this temperature");
 module_param(verbose, uint, 0600);
 MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");
+module_param(list_supported, uint, 0600);
+MODULE_PARM_DESC(list_supported, "List supported models and BIOS versions");
 module_param_string(force_bios, force_bios, 16, 0);
-MODULE_PARM_DESC(force_bios, "Force BIOS version and omit BIOS check");
+MODULE_PARM_DESC(force_bios, "Pretend system has this known supported BIOS version");
 module_param_string(force_product, force_product, 16, 0);
-MODULE_PARM_DESC(force_product, "Force BIOS product and omit BIOS check");
+MODULE_PARM_DESC(force_product, "Pretend system is this known supported model");
 
 /*
  * cmd_off: to switch the fan completely off and check if the fan is off
@@ -130,7 +133,7 @@ static const struct manualcmd mcmd = {
        .moff = 0xff,
 };
 
-/* BIOS settings */
+/* BIOS settings - only used during probe */
 struct bios_settings {
        const char *vendor;
        const char *product;
@@ -141,8 +144,18 @@ struct bios_settings {
        int mcmd_enable;
 };
 
+/* This could be a daughter struct in the above, but not worth the redirect */
+struct ctrl_settings {
+       u8 fanreg;
+       u8 tempreg;
+       struct fancmd cmd;
+       int mcmd_enable;
+};
+
+static struct ctrl_settings ctrl_cfg __read_mostly;
+
 /* Register addresses and values for different BIOS versions */
-static const struct bios_settings bios_tbl[] = {
+static const struct bios_settings bios_tbl[] __initconst = {
        /* AOA110 */
        {"Acer", "AOA110", "v0.3109", 0x55, 0x58, {0x1f, 0x00}, 0},
        {"Acer", "AOA110", "v0.3114", 0x55, 0x58, {0x1f, 0x00}, 0},
@@ -233,6 +246,7 @@ static const struct bios_settings bios_tbl[] = {
        {"Gateway", "LT31",   "v1.3201",  0x55, 0x58, {0x9e, 0x00}, 0},
        {"Gateway", "LT31",   "v1.3302",  0x55, 0x58, {0x9e, 0x00}, 0},
        {"Gateway", "LT31",   "v1.3303t", 0x55, 0x58, {0x9e, 0x00}, 0},
+       {"Gateway", "LT31",   "v1.3307",  0x55, 0x58, {0x9e, 0x00}, 0},
        /* Packard Bell */
        {"Packard Bell", "DOA150",  "v0.3104",  0x55, 0x58, {0x21, 0x00}, 0},
        {"Packard Bell", "DOA150",  "v0.3105",  0x55, 0x58, {0x20, 0x00}, 0},
@@ -256,8 +270,6 @@ static const struct bios_settings bios_tbl[] = {
        {"", "", "", 0, 0, {0, 0}, 0}
 };
 
-static const struct bios_settings *bios_cfg __read_mostly;
-
 /*
  * this struct is used to instruct thermal layer to use bang_bang instead of
  * default governor for acerhdf
@@ -270,7 +282,7 @@ static int acerhdf_get_temp(int *temp)
 {
        u8 read_temp;
 
-       if (ec_read(bios_cfg->tempreg, &read_temp))
+       if (ec_read(ctrl_cfg.tempreg, &read_temp))
                return -EINVAL;
 
        *temp = read_temp * 1000;
@@ -282,10 +294,10 @@ static int acerhdf_get_fanstate(int *state)
 {
        u8 fan;
 
-       if (ec_read(bios_cfg->fanreg, &fan))
+       if (ec_read(ctrl_cfg.fanreg, &fan))
                return -EINVAL;
 
-       if (fan != bios_cfg->cmd.cmd_off)
+       if (fan != ctrl_cfg.cmd.cmd_off)
                *state = ACERHDF_FAN_AUTO;
        else
                *state = ACERHDF_FAN_OFF;
@@ -306,13 +318,13 @@ static void acerhdf_change_fanstate(int state)
                state = ACERHDF_FAN_AUTO;
        }
 
-       cmd = (state == ACERHDF_FAN_OFF) ? bios_cfg->cmd.cmd_off
-                                        : bios_cfg->cmd.cmd_auto;
+       cmd = (state == ACERHDF_FAN_OFF) ? ctrl_cfg.cmd.cmd_off
+                                        : ctrl_cfg.cmd.cmd_auto;
        fanstate = state;
 
-       ec_write(bios_cfg->fanreg, cmd);
+       ec_write(ctrl_cfg.fanreg, cmd);
 
-       if (bios_cfg->mcmd_enable && state == ACERHDF_FAN_OFF) {
+       if (ctrl_cfg.mcmd_enable && state == ACERHDF_FAN_OFF) {
                if (verbose)
                        pr_notice("turning off fan manually\n");
                ec_write(mcmd.mreg, mcmd.moff);
@@ -615,10 +627,11 @@ static int str_starts_with(const char *str, const char *start)
 }
 
 /* check hardware */
-static int acerhdf_check_hardware(void)
+static int __init acerhdf_check_hardware(void)
 {
        char const *vendor, *version, *product;
        const struct bios_settings *bt = NULL;
+       int found = 0;
 
        /* get BIOS data */
        vendor  = dmi_get_system_info(DMI_SYS_VENDOR);
@@ -632,6 +645,17 @@ static int acerhdf_check_hardware(void)
 
        pr_info("Acer Aspire One Fan driver, v.%s\n", DRV_VER);
 
+       if (list_supported) {
+               pr_info("List of supported Manufacturer/Model/BIOS:\n");
+               pr_info("---------------------------------------------------\n");
+               for (bt = bios_tbl; bt->vendor[0]; bt++) {
+                       pr_info("%-13s | %-17s | %-10s\n", bt->vendor,
+                               bt->product, bt->version);
+               }
+               pr_info("---------------------------------------------------\n");
+               return -ECANCELED;
+       }
+
        if (force_bios[0]) {
                version = force_bios;
                pr_info("forcing BIOS version: %s\n", version);
@@ -657,30 +681,36 @@ static int acerhdf_check_hardware(void)
                if (str_starts_with(vendor, bt->vendor) &&
                                str_starts_with(product, bt->product) &&
                                str_starts_with(version, bt->version)) {
-                       bios_cfg = bt;
+                       found = 1;
                        break;
                }
        }
 
-       if (!bios_cfg) {
+       if (!found) {
                pr_err("unknown (unsupported) BIOS version %s/%s/%s, please report, aborting!\n",
                       vendor, product, version);
                return -EINVAL;
        }
 
+       /* Copy control settings from BIOS table before we free it. */
+       ctrl_cfg.fanreg = bt->fanreg;
+       ctrl_cfg.tempreg = bt->tempreg;
+       memcpy(&ctrl_cfg.cmd, &bt->cmd, sizeof(struct fancmd));
+       ctrl_cfg.mcmd_enable = bt->mcmd_enable;
+
        /*
         * if started with kernel mode off, prevent the kernel from switching
         * off the fan
         */
        if (!kernelmode) {
                pr_notice("Fan control off, to enable do:\n");
-               pr_notice("echo -n \"enabled\" > /sys/class/thermal/thermal_zone0/mode\n");
+               pr_notice("echo -n \"enabled\" > /sys/class/thermal/thermal_zoneN/mode # N=0,1,2...\n");
        }
 
        return 0;
 }
 
-static int acerhdf_register_platform(void)
+static int __init acerhdf_register_platform(void)
 {
        int err = 0;
 
@@ -712,7 +742,7 @@ static void acerhdf_unregister_platform(void)
        platform_driver_unregister(&acerhdf_driver);
 }
 
-static int acerhdf_register_thermal(void)
+static int __init acerhdf_register_thermal(void)
 {
        cl_dev = thermal_cooling_device_register("acerhdf-fan", NULL,
                                                 &acerhdf_cooling_ops);
index 93ee2d5466f8092978f57bf74ca84ee9c3cc1cc8..c285a16675ee7d912280028bf5f94c552fad6371 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/platform_data/x86/asus-wmi.h>
 #include <linux/platform_device.h>
 #include <linux/thermal.h>
 #include <linux/acpi.h>
@@ -69,89 +70,6 @@ MODULE_LICENSE("GPL");
 #define NOTIFY_KBD_BRTDWN              0xc5
 #define NOTIFY_KBD_BRTTOGGLE           0xc7
 
-/* WMI Methods */
-#define ASUS_WMI_METHODID_SPEC         0x43455053 /* BIOS SPECification */
-#define ASUS_WMI_METHODID_SFBD         0x44424653 /* Set First Boot Device */
-#define ASUS_WMI_METHODID_GLCD         0x44434C47 /* Get LCD status */
-#define ASUS_WMI_METHODID_GPID         0x44495047 /* Get Panel ID?? (Resol) */
-#define ASUS_WMI_METHODID_QMOD         0x444F4D51 /* Quiet MODe */
-#define ASUS_WMI_METHODID_SPLV         0x4C425053 /* Set Panel Light Value */
-#define ASUS_WMI_METHODID_AGFN         0x4E464741 /* FaN? */
-#define ASUS_WMI_METHODID_SFUN         0x4E554653 /* FUNCtionalities */
-#define ASUS_WMI_METHODID_SDSP         0x50534453 /* Set DiSPlay output */
-#define ASUS_WMI_METHODID_GDSP         0x50534447 /* Get DiSPlay output */
-#define ASUS_WMI_METHODID_DEVP         0x50564544 /* DEVice Policy */
-#define ASUS_WMI_METHODID_OSVR         0x5256534F /* OS VeRsion */
-#define ASUS_WMI_METHODID_DSTS         0x53544344 /* Device STatuS */
-#define ASUS_WMI_METHODID_DSTS2                0x53545344 /* Device STatuS #2*/
-#define ASUS_WMI_METHODID_BSTS         0x53545342 /* Bios STatuS ? */
-#define ASUS_WMI_METHODID_DEVS         0x53564544 /* DEVice Set */
-#define ASUS_WMI_METHODID_CFVS         0x53564643 /* CPU Frequency Volt Set */
-#define ASUS_WMI_METHODID_KBFT         0x5446424B /* KeyBoard FilTer */
-#define ASUS_WMI_METHODID_INIT         0x54494E49 /* INITialize */
-#define ASUS_WMI_METHODID_HKEY         0x59454B48 /* Hot KEY ?? */
-
-#define ASUS_WMI_UNSUPPORTED_METHOD    0xFFFFFFFE
-
-/* Wireless */
-#define ASUS_WMI_DEVID_HW_SWITCH       0x00010001
-#define ASUS_WMI_DEVID_WIRELESS_LED    0x00010002
-#define ASUS_WMI_DEVID_CWAP            0x00010003
-#define ASUS_WMI_DEVID_WLAN            0x00010011
-#define ASUS_WMI_DEVID_WLAN_LED                0x00010012
-#define ASUS_WMI_DEVID_BLUETOOTH       0x00010013
-#define ASUS_WMI_DEVID_GPS             0x00010015
-#define ASUS_WMI_DEVID_WIMAX           0x00010017
-#define ASUS_WMI_DEVID_WWAN3G          0x00010019
-#define ASUS_WMI_DEVID_UWB             0x00010021
-
-/* Leds */
-/* 0x000200XX and 0x000400XX */
-#define ASUS_WMI_DEVID_LED1            0x00020011
-#define ASUS_WMI_DEVID_LED2            0x00020012
-#define ASUS_WMI_DEVID_LED3            0x00020013
-#define ASUS_WMI_DEVID_LED4            0x00020014
-#define ASUS_WMI_DEVID_LED5            0x00020015
-#define ASUS_WMI_DEVID_LED6            0x00020016
-
-/* Backlight and Brightness */
-#define ASUS_WMI_DEVID_ALS_ENABLE      0x00050001 /* Ambient Light Sensor */
-#define ASUS_WMI_DEVID_BACKLIGHT       0x00050011
-#define ASUS_WMI_DEVID_BRIGHTNESS      0x00050012
-#define ASUS_WMI_DEVID_KBD_BACKLIGHT   0x00050021
-#define ASUS_WMI_DEVID_LIGHT_SENSOR    0x00050022 /* ?? */
-#define ASUS_WMI_DEVID_LIGHTBAR                0x00050025
-
-/* Misc */
-#define ASUS_WMI_DEVID_CAMERA          0x00060013
-
-/* Storage */
-#define ASUS_WMI_DEVID_CARDREADER      0x00080013
-
-/* Input */
-#define ASUS_WMI_DEVID_TOUCHPAD                0x00100011
-#define ASUS_WMI_DEVID_TOUCHPAD_LED    0x00100012
-
-/* Fan, Thermal */
-#define ASUS_WMI_DEVID_THERMAL_CTRL    0x00110011
-#define ASUS_WMI_DEVID_FAN_CTRL                0x00110012
-
-/* Power */
-#define ASUS_WMI_DEVID_PROCESSOR_STATE 0x00120012
-
-/* Deep S3 / Resume on LID open */
-#define ASUS_WMI_DEVID_LID_RESUME      0x00120031
-
-/* DSTS masks */
-#define ASUS_WMI_DSTS_STATUS_BIT       0x00000001
-#define ASUS_WMI_DSTS_UNKNOWN_BIT      0x00000002
-#define ASUS_WMI_DSTS_PRESENCE_BIT     0x00010000
-#define ASUS_WMI_DSTS_USER_BIT         0x00020000
-#define ASUS_WMI_DSTS_BIOS_BIT         0x00040000
-#define ASUS_WMI_DSTS_BRIGHTNESS_MASK  0x000000FF
-#define ASUS_WMI_DSTS_MAX_BRIGTH_MASK  0x0000FF00
-#define ASUS_WMI_DSTS_LIGHTBAR_MASK    0x0000000F
-
 #define ASUS_FAN_DESC                  "cpu_fan"
 #define ASUS_FAN_MFUN                  0x13
 #define ASUS_FAN_SFUN_READ             0x06
@@ -239,7 +157,6 @@ struct asus_wmi {
        int lightbar_led_wk;
        struct workqueue_struct *led_workqueue;
        struct work_struct tpd_led_work;
-       struct work_struct kbd_led_work;
        struct work_struct wlan_led_work;
        struct work_struct lightbar_led_work;
 
@@ -302,8 +219,7 @@ static void asus_wmi_input_exit(struct asus_wmi *asus)
        asus->inputdev = NULL;
 }
 
-static int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
-                                   u32 *retval)
+int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval)
 {
        struct bios_args args = {
                .arg0 = arg0,
@@ -339,6 +255,7 @@ exit:
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(asus_wmi_evaluate_method);
 
 static int asus_wmi_evaluate_method_agfn(const struct acpi_buffer args)
 {
@@ -456,12 +373,9 @@ static enum led_brightness tpd_led_get(struct led_classdev *led_cdev)
        return read_tpd_led_state(asus);
 }
 
-static void kbd_led_update(struct work_struct *work)
+static void kbd_led_update(struct asus_wmi *asus)
 {
        int ctrl_param = 0;
-       struct asus_wmi *asus;
-
-       asus = container_of(work, struct asus_wmi, kbd_led_work);
 
        /*
         * bits 0-2: level
@@ -471,7 +385,6 @@ static void kbd_led_update(struct work_struct *work)
                ctrl_param = 0x80 | (asus->kbd_led_wk & 0x7F);
 
        asus_wmi_set_devstate(ASUS_WMI_DEVID_KBD_BACKLIGHT, ctrl_param, NULL);
-       led_classdev_notify_brightness_hw_changed(&asus->kbd_led, asus->kbd_led_wk);
 }
 
 static int kbd_led_read(struct asus_wmi *asus, int *level, int *env)
@@ -516,7 +429,7 @@ static void do_kbd_led_set(struct led_classdev *led_cdev, int value)
                value = 0;
 
        asus->kbd_led_wk = value;
-       queue_work(asus->led_workqueue, &asus->kbd_led_work);
+       kbd_led_update(asus);
 }
 
 static void kbd_led_set(struct led_classdev *led_cdev,
@@ -525,6 +438,14 @@ static void kbd_led_set(struct led_classdev *led_cdev,
        do_kbd_led_set(led_cdev, value);
 }
 
+static void kbd_led_set_by_kbd(struct asus_wmi *asus, enum led_brightness value)
+{
+       struct led_classdev *led_cdev = &asus->kbd_led;
+
+       do_kbd_led_set(led_cdev, value);
+       led_classdev_notify_brightness_hw_changed(led_cdev, asus->kbd_led_wk);
+}
+
 static enum led_brightness kbd_led_get(struct led_classdev *led_cdev)
 {
        struct asus_wmi *asus;
@@ -671,8 +592,6 @@ static int asus_wmi_led_init(struct asus_wmi *asus)
 
        led_val = kbd_led_read(asus, NULL, NULL);
        if (led_val >= 0) {
-               INIT_WORK(&asus->kbd_led_work, kbd_led_update);
-
                asus->kbd_led_wk = led_val;
                asus->kbd_led.name = "asus::kbd_backlight";
                asus->kbd_led.flags = LED_BRIGHT_HW_CHANGED;
@@ -1746,18 +1665,18 @@ static void asus_wmi_notify(u32 value, void *context)
        }
 
        if (code == NOTIFY_KBD_BRTUP) {
-               do_kbd_led_set(&asus->kbd_led, asus->kbd_led_wk + 1);
+               kbd_led_set_by_kbd(asus, asus->kbd_led_wk + 1);
                goto exit;
        }
        if (code == NOTIFY_KBD_BRTDWN) {
-               do_kbd_led_set(&asus->kbd_led, asus->kbd_led_wk - 1);
+               kbd_led_set_by_kbd(asus, asus->kbd_led_wk - 1);
                goto exit;
        }
        if (code == NOTIFY_KBD_BRTTOGGLE) {
                if (asus->kbd_led_wk == asus->kbd_led.max_brightness)
-                       do_kbd_led_set(&asus->kbd_led, 0);
+                       kbd_led_set_by_kbd(asus, 0);
                else
-                       do_kbd_led_set(&asus->kbd_led, asus->kbd_led_wk + 1);
+                       kbd_led_set_by_kbd(asus, asus->kbd_led_wk + 1);
                goto exit;
        }
 
@@ -2291,7 +2210,7 @@ static int asus_hotk_resume(struct device *device)
        struct asus_wmi *asus = dev_get_drvdata(device);
 
        if (!IS_ERR_OR_NULL(asus->kbd_led.dev))
-               queue_work(asus->led_workqueue, &asus->kbd_led_work);
+               kbd_led_update(asus);
 
        return 0;
 }
@@ -2327,7 +2246,7 @@ static int asus_hotk_restore(struct device *device)
                rfkill_set_sw_state(asus->uwb.rfkill, bl);
        }
        if (!IS_ERR_OR_NULL(asus->kbd_led.dev))
-               queue_work(asus->led_workqueue, &asus->kbd_led_work);
+               kbd_led_update(asus);
 
        return 0;
 }
diff --git a/drivers/platform/x86/dcdbas.c b/drivers/platform/x86/dcdbas.c
new file mode 100644 (file)
index 0000000..88bd7ef
--- /dev/null
@@ -0,0 +1,761 @@
+/*
+ *  dcdbas.c: Dell Systems Management Base Driver
+ *
+ *  The Dell Systems Management Base Driver provides a sysfs interface for
+ *  systems management software to perform System Management Interrupts (SMIs)
+ *  and Host Control Actions (power cycle or power off after OS shutdown) on
+ *  Dell systems.
+ *
+ *  See Documentation/dcdbas.txt for more information.
+ *
+ *  Copyright (C) 1995-2006 Dell Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License v2.0 as published by
+ *  the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/cpu.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mc146818rtc.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+#include "dcdbas.h"
+
+#define DRIVER_NAME            "dcdbas"
+#define DRIVER_VERSION         "5.6.0-3.3"
+#define DRIVER_DESCRIPTION     "Dell Systems Management Base Driver"
+
+static struct platform_device *dcdbas_pdev;
+
+static u8 *smi_data_buf;
+static dma_addr_t smi_data_buf_handle;
+static unsigned long smi_data_buf_size;
+static unsigned long max_smi_data_buf_size = MAX_SMI_DATA_BUF_SIZE;
+static u32 smi_data_buf_phys_addr;
+static DEFINE_MUTEX(smi_data_lock);
+static u8 *eps_buffer;
+
+static unsigned int host_control_action;
+static unsigned int host_control_smi_type;
+static unsigned int host_control_on_shutdown;
+
+static bool wsmt_enabled;
+
+/**
+ * smi_data_buf_free: free SMI data buffer
+ */
+static void smi_data_buf_free(void)
+{
+       if (!smi_data_buf || wsmt_enabled)
+               return;
+
+       dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
+               __func__, smi_data_buf_phys_addr, smi_data_buf_size);
+
+       dma_free_coherent(&dcdbas_pdev->dev, smi_data_buf_size, smi_data_buf,
+                         smi_data_buf_handle);
+       smi_data_buf = NULL;
+       smi_data_buf_handle = 0;
+       smi_data_buf_phys_addr = 0;
+       smi_data_buf_size = 0;
+}
+
+/**
+ * smi_data_buf_realloc: grow SMI data buffer if needed
+ */
+static int smi_data_buf_realloc(unsigned long size)
+{
+       void *buf;
+       dma_addr_t handle;
+
+       if (smi_data_buf_size >= size)
+               return 0;
+
+       if (size > max_smi_data_buf_size)
+               return -EINVAL;
+
+       /* new buffer is needed */
+       buf = dma_alloc_coherent(&dcdbas_pdev->dev, size, &handle, GFP_KERNEL);
+       if (!buf) {
+               dev_dbg(&dcdbas_pdev->dev,
+                       "%s: failed to allocate memory size %lu\n",
+                       __func__, size);
+               return -ENOMEM;
+       }
+       /* memory zeroed by dma_alloc_coherent */
+
+       if (smi_data_buf)
+               memcpy(buf, smi_data_buf, smi_data_buf_size);
+
+       /* free any existing buffer */
+       smi_data_buf_free();
+
+       /* set up new buffer for use */
+       smi_data_buf = buf;
+       smi_data_buf_handle = handle;
+       smi_data_buf_phys_addr = (u32) virt_to_phys(buf);
+       smi_data_buf_size = size;
+
+       dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
+               __func__, smi_data_buf_phys_addr, smi_data_buf_size);
+
+       return 0;
+}
+
+static ssize_t smi_data_buf_phys_addr_show(struct device *dev,
+                                          struct device_attribute *attr,
+                                          char *buf)
+{
+       return sprintf(buf, "%x\n", smi_data_buf_phys_addr);
+}
+
+static ssize_t smi_data_buf_size_show(struct device *dev,
+                                     struct device_attribute *attr,
+                                     char *buf)
+{
+       return sprintf(buf, "%lu\n", smi_data_buf_size);
+}
+
+static ssize_t smi_data_buf_size_store(struct device *dev,
+                                      struct device_attribute *attr,
+                                      const char *buf, size_t count)
+{
+       unsigned long buf_size;
+       ssize_t ret;
+
+       buf_size = simple_strtoul(buf, NULL, 10);
+
+       /* make sure SMI data buffer is at least buf_size */
+       mutex_lock(&smi_data_lock);
+       ret = smi_data_buf_realloc(buf_size);
+       mutex_unlock(&smi_data_lock);
+       if (ret)
+               return ret;
+
+       return count;
+}
+
+static ssize_t smi_data_read(struct file *filp, struct kobject *kobj,
+                            struct bin_attribute *bin_attr,
+                            char *buf, loff_t pos, size_t count)
+{
+       ssize_t ret;
+
+       mutex_lock(&smi_data_lock);
+       ret = memory_read_from_buffer(buf, count, &pos, smi_data_buf,
+                                       smi_data_buf_size);
+       mutex_unlock(&smi_data_lock);
+       return ret;
+}
+
+static ssize_t smi_data_write(struct file *filp, struct kobject *kobj,
+                             struct bin_attribute *bin_attr,
+                             char *buf, loff_t pos, size_t count)
+{
+       ssize_t ret;
+
+       if ((pos + count) > max_smi_data_buf_size)
+               return -EINVAL;
+
+       mutex_lock(&smi_data_lock);
+
+       ret = smi_data_buf_realloc(pos + count);
+       if (ret)
+               goto out;
+
+       memcpy(smi_data_buf + pos, buf, count);
+       ret = count;
+out:
+       mutex_unlock(&smi_data_lock);
+       return ret;
+}
+
+static ssize_t host_control_action_show(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       return sprintf(buf, "%u\n", host_control_action);
+}
+
+static ssize_t host_control_action_store(struct device *dev,
+                                        struct device_attribute *attr,
+                                        const char *buf, size_t count)
+{
+       ssize_t ret;
+
+       /* make sure buffer is available for host control command */
+       mutex_lock(&smi_data_lock);
+       ret = smi_data_buf_realloc(sizeof(struct apm_cmd));
+       mutex_unlock(&smi_data_lock);
+       if (ret)
+               return ret;
+
+       host_control_action = simple_strtoul(buf, NULL, 10);
+       return count;
+}
+
+static ssize_t host_control_smi_type_show(struct device *dev,
+                                         struct device_attribute *attr,
+                                         char *buf)
+{
+       return sprintf(buf, "%u\n", host_control_smi_type);
+}
+
+static ssize_t host_control_smi_type_store(struct device *dev,
+                                          struct device_attribute *attr,
+                                          const char *buf, size_t count)
+{
+       host_control_smi_type = simple_strtoul(buf, NULL, 10);
+       return count;
+}
+
+static ssize_t host_control_on_shutdown_show(struct device *dev,
+                                            struct device_attribute *attr,
+                                            char *buf)
+{
+       return sprintf(buf, "%u\n", host_control_on_shutdown);
+}
+
+static ssize_t host_control_on_shutdown_store(struct device *dev,
+                                             struct device_attribute *attr,
+                                             const char *buf, size_t count)
+{
+       host_control_on_shutdown = simple_strtoul(buf, NULL, 10);
+       return count;
+}
+
+static int raise_smi(void *par)
+{
+       struct smi_cmd *smi_cmd = par;
+
+       if (smp_processor_id() != 0) {
+               dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
+                       __func__);
+               return -EBUSY;
+       }
+
+       /* generate SMI */
+       /* inb to force posted write through and make SMI happen now */
+       asm volatile (
+               "outb %b0,%w1\n"
+               "inb %w1"
+               : /* no output args */
+               : "a" (smi_cmd->command_code),
+                 "d" (smi_cmd->command_address),
+                 "b" (smi_cmd->ebx),
+                 "c" (smi_cmd->ecx)
+               : "memory"
+       );
+
+       return 0;
+}
+/**
+ * dcdbas_smi_request: generate SMI request
+ *
+ * Called with smi_data_lock.
+ */
+int dcdbas_smi_request(struct smi_cmd *smi_cmd)
+{
+       int ret;
+
+       if (smi_cmd->magic != SMI_CMD_MAGIC) {
+               dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n",
+                        __func__);
+               return -EBADR;
+       }
+
+       /* SMI requires CPU 0 */
+       get_online_cpus();
+       ret = smp_call_on_cpu(0, raise_smi, smi_cmd, true);
+       put_online_cpus();
+
+       return ret;
+}
+
+/**
+ * smi_request_store:
+ *
+ * The valid values are:
+ * 0: zero SMI data buffer
+ * 1: generate calling interface SMI
+ * 2: generate raw SMI
+ *
+ * User application writes smi_cmd to smi_data before telling driver
+ * to generate SMI.
+ */
+static ssize_t smi_request_store(struct device *dev,
+                                struct device_attribute *attr,
+                                const char *buf, size_t count)
+{
+       struct smi_cmd *smi_cmd;
+       unsigned long val = simple_strtoul(buf, NULL, 10);
+       ssize_t ret;
+
+       mutex_lock(&smi_data_lock);
+
+       if (smi_data_buf_size < sizeof(struct smi_cmd)) {
+               ret = -ENODEV;
+               goto out;
+       }
+       smi_cmd = (struct smi_cmd *)smi_data_buf;
+
+       switch (val) {
+       case 2:
+               /* Raw SMI */
+               ret = dcdbas_smi_request(smi_cmd);
+               if (!ret)
+                       ret = count;
+               break;
+       case 1:
+               /*
+                * Calling Interface SMI
+                *
+                * Provide physical address of command buffer field within
+                * the struct smi_cmd to BIOS.
+                *
+                * Because the address that smi_cmd (smi_data_buf) points to
+                * will be from memremap() of a non-memory address if WSMT
+                * is present, we can't use virt_to_phys() on smi_cmd, so
+                * we have to use the physical address that was saved when
+                * the virtual address for smi_cmd was received.
+                */
+               smi_cmd->ebx = smi_data_buf_phys_addr +
+                               offsetof(struct smi_cmd, command_buffer);
+               ret = dcdbas_smi_request(smi_cmd);
+               if (!ret)
+                       ret = count;
+               break;
+       case 0:
+               memset(smi_data_buf, 0, smi_data_buf_size);
+               ret = count;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+out:
+       mutex_unlock(&smi_data_lock);
+       return ret;
+}
+EXPORT_SYMBOL(dcdbas_smi_request);
+
+/**
+ * host_control_smi: generate host control SMI
+ *
+ * Caller must set up the host control command in smi_data_buf.
+ */
+static int host_control_smi(void)
+{
+       struct apm_cmd *apm_cmd;
+       u8 *data;
+       unsigned long flags;
+       u32 num_ticks;
+       s8 cmd_status;
+       u8 index;
+
+       apm_cmd = (struct apm_cmd *)smi_data_buf;
+       apm_cmd->status = ESM_STATUS_CMD_UNSUCCESSFUL;
+
+       switch (host_control_smi_type) {
+       case HC_SMITYPE_TYPE1:
+               spin_lock_irqsave(&rtc_lock, flags);
+               /* write SMI data buffer physical address */
+               data = (u8 *)&smi_data_buf_phys_addr;
+               for (index = PE1300_CMOS_CMD_STRUCT_PTR;
+                    index < (PE1300_CMOS_CMD_STRUCT_PTR + 4);
+                    index++, data++) {
+                       outb(index,
+                            (CMOS_BASE_PORT + CMOS_PAGE2_INDEX_PORT_PIIX4));
+                       outb(*data,
+                            (CMOS_BASE_PORT + CMOS_PAGE2_DATA_PORT_PIIX4));
+               }
+
+               /* first set status to -1 as called by spec */
+               cmd_status = ESM_STATUS_CMD_UNSUCCESSFUL;
+               outb((u8) cmd_status, PCAT_APM_STATUS_PORT);
+
+               /* generate SMM call */
+               outb(ESM_APM_CMD, PCAT_APM_CONTROL_PORT);
+               spin_unlock_irqrestore(&rtc_lock, flags);
+
+               /* wait a few to see if it executed */
+               num_ticks = TIMEOUT_USEC_SHORT_SEMA_BLOCKING;
+               while ((cmd_status = inb(PCAT_APM_STATUS_PORT))
+                      == ESM_STATUS_CMD_UNSUCCESSFUL) {
+                       num_ticks--;
+                       if (num_ticks == EXPIRED_TIMER)
+                               return -ETIME;
+               }
+               break;
+
+       case HC_SMITYPE_TYPE2:
+       case HC_SMITYPE_TYPE3:
+               spin_lock_irqsave(&rtc_lock, flags);
+               /* write SMI data buffer physical address */
+               data = (u8 *)&smi_data_buf_phys_addr;
+               for (index = PE1400_CMOS_CMD_STRUCT_PTR;
+                    index < (PE1400_CMOS_CMD_STRUCT_PTR + 4);
+                    index++, data++) {
+                       outb(index, (CMOS_BASE_PORT + CMOS_PAGE1_INDEX_PORT));
+                       outb(*data, (CMOS_BASE_PORT + CMOS_PAGE1_DATA_PORT));
+               }
+
+               /* generate SMM call */
+               if (host_control_smi_type == HC_SMITYPE_TYPE3)
+                       outb(ESM_APM_CMD, PCAT_APM_CONTROL_PORT);
+               else
+                       outb(ESM_APM_CMD, PE1400_APM_CONTROL_PORT);
+
+               /* restore RTC index pointer since it was written to above */
+               CMOS_READ(RTC_REG_C);
+               spin_unlock_irqrestore(&rtc_lock, flags);
+
+               /* read control port back to serialize write */
+               cmd_status = inb(PE1400_APM_CONTROL_PORT);
+
+               /* wait a few to see if it executed */
+               num_ticks = TIMEOUT_USEC_SHORT_SEMA_BLOCKING;
+               while (apm_cmd->status == ESM_STATUS_CMD_UNSUCCESSFUL) {
+                       num_ticks--;
+                       if (num_ticks == EXPIRED_TIMER)
+                               return -ETIME;
+               }
+               break;
+
+       default:
+               dev_dbg(&dcdbas_pdev->dev, "%s: invalid SMI type %u\n",
+                       __func__, host_control_smi_type);
+               return -ENOSYS;
+       }
+
+       return 0;
+}
+
+/**
+ * dcdbas_host_control: initiate host control
+ *
+ * This function is called by the driver after the system has
+ * finished shutting down if the user application specified a
+ * host control action to perform on shutdown.  It is safe to
+ * use smi_data_buf at this point because the system has finished
+ * shutting down and no userspace apps are running.
+ */
+static void dcdbas_host_control(void)
+{
+       struct apm_cmd *apm_cmd;
+       u8 action;
+
+       if (host_control_action == HC_ACTION_NONE)
+               return;
+
+       action = host_control_action;
+       host_control_action = HC_ACTION_NONE;
+
+       if (!smi_data_buf) {
+               dev_dbg(&dcdbas_pdev->dev, "%s: no SMI buffer\n", __func__);
+               return;
+       }
+
+       if (smi_data_buf_size < sizeof(struct apm_cmd)) {
+               dev_dbg(&dcdbas_pdev->dev, "%s: SMI buffer too small\n",
+                       __func__);
+               return;
+       }
+
+       apm_cmd = (struct apm_cmd *)smi_data_buf;
+
+       /* power off takes precedence */
+       if (action & HC_ACTION_HOST_CONTROL_POWEROFF) {
+               apm_cmd->command = ESM_APM_POWER_CYCLE;
+               apm_cmd->reserved = 0;
+               *((s16 *)&apm_cmd->parameters.shortreq.parm[0]) = (s16) 0;
+               host_control_smi();
+       } else if (action & HC_ACTION_HOST_CONTROL_POWERCYCLE) {
+               apm_cmd->command = ESM_APM_POWER_CYCLE;
+               apm_cmd->reserved = 0;
+               *((s16 *)&apm_cmd->parameters.shortreq.parm[0]) = (s16) 20;
+               host_control_smi();
+       }
+}
+
+/* WSMT */
+
+static u8 checksum(u8 *buffer, u8 length)
+{
+       u8 sum = 0;
+       u8 *end = buffer + length;
+
+       while (buffer < end)
+               sum += *buffer++;
+       return sum;
+}
+
+static inline struct smm_eps_table *check_eps_table(u8 *addr)
+{
+       struct smm_eps_table *eps = (struct smm_eps_table *)addr;
+
+       if (strncmp(eps->smm_comm_buff_anchor, SMM_EPS_SIG, 4) != 0)
+               return NULL;
+
+       if (checksum(addr, eps->length) != 0)
+               return NULL;
+
+       return eps;
+}
+
+static int dcdbas_check_wsmt(void)
+{
+       struct acpi_table_wsmt *wsmt = NULL;
+       struct smm_eps_table *eps = NULL;
+       u64 remap_size;
+       u8 *addr;
+
+       acpi_get_table(ACPI_SIG_WSMT, 0, (struct acpi_table_header **)&wsmt);
+       if (!wsmt)
+               return 0;
+
+       /* Check if WSMT ACPI table shows that protection is enabled */
+       if (!(wsmt->protection_flags & ACPI_WSMT_FIXED_COMM_BUFFERS) ||
+           !(wsmt->protection_flags & ACPI_WSMT_COMM_BUFFER_NESTED_PTR_PROTECTION))
+               return 0;
+
+       /* Scan for EPS (entry point structure) */
+       for (addr = (u8 *)__va(0xf0000);
+            addr < (u8 *)__va(0x100000 - sizeof(struct smm_eps_table));
+            addr += 16) {
+               eps = check_eps_table(addr);
+               if (eps)
+                       break;
+       }
+
+       if (!eps) {
+               dev_dbg(&dcdbas_pdev->dev, "found WSMT, but no EPS found\n");
+               return -ENODEV;
+       }
+
+       /*
+        * Get physical address of buffer and map to virtual address.
+        * Table gives size in 4K pages, regardless of actual system page size.
+        */
+       if (upper_32_bits(eps->smm_comm_buff_addr + 8)) {
+               dev_warn(&dcdbas_pdev->dev, "found WSMT, but EPS buffer address is above 4GB\n");
+               return -EINVAL;
+       }
+       /*
+        * Limit remap size to MAX_SMI_DATA_BUF_SIZE + 8 (since the first 8
+        * bytes are used for a semaphore, not the data buffer itself).
+        */
+       remap_size = eps->num_of_4k_pages * PAGE_SIZE;
+       if (remap_size > MAX_SMI_DATA_BUF_SIZE + 8)
+               remap_size = MAX_SMI_DATA_BUF_SIZE + 8;
+       eps_buffer = memremap(eps->smm_comm_buff_addr, remap_size, MEMREMAP_WB);
+       if (!eps_buffer) {
+               dev_warn(&dcdbas_pdev->dev, "found WSMT, but failed to map EPS buffer\n");
+               return -ENOMEM;
+       }
+
+       /* First 8 bytes is for a semaphore, not part of the smi_data_buf */
+       smi_data_buf_phys_addr = eps->smm_comm_buff_addr + 8;
+       smi_data_buf = eps_buffer + 8;
+       smi_data_buf_size = remap_size - 8;
+       max_smi_data_buf_size = smi_data_buf_size;
+       wsmt_enabled = true;
+       dev_info(&dcdbas_pdev->dev,
+                "WSMT found, using firmware-provided SMI buffer.\n");
+       return 1;
+}
+
+/**
+ * dcdbas_reboot_notify: handle reboot notification for host control
+ */
+static int dcdbas_reboot_notify(struct notifier_block *nb, unsigned long code,
+                               void *unused)
+{
+       switch (code) {
+       case SYS_DOWN:
+       case SYS_HALT:
+       case SYS_POWER_OFF:
+               if (host_control_on_shutdown) {
+                       /* firmware is going to perform host control action */
+                       printk(KERN_WARNING "Please wait for shutdown "
+                              "action to complete...\n");
+                       dcdbas_host_control();
+               }
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block dcdbas_reboot_nb = {
+       .notifier_call = dcdbas_reboot_notify,
+       .next = NULL,
+       .priority = INT_MIN
+};
+
+static DCDBAS_BIN_ATTR_RW(smi_data);
+
+static struct bin_attribute *dcdbas_bin_attrs[] = {
+       &bin_attr_smi_data,
+       NULL
+};
+
+static DCDBAS_DEV_ATTR_RW(smi_data_buf_size);
+static DCDBAS_DEV_ATTR_RO(smi_data_buf_phys_addr);
+static DCDBAS_DEV_ATTR_WO(smi_request);
+static DCDBAS_DEV_ATTR_RW(host_control_action);
+static DCDBAS_DEV_ATTR_RW(host_control_smi_type);
+static DCDBAS_DEV_ATTR_RW(host_control_on_shutdown);
+
+static struct attribute *dcdbas_dev_attrs[] = {
+       &dev_attr_smi_data_buf_size.attr,
+       &dev_attr_smi_data_buf_phys_addr.attr,
+       &dev_attr_smi_request.attr,
+       &dev_attr_host_control_action.attr,
+       &dev_attr_host_control_smi_type.attr,
+       &dev_attr_host_control_on_shutdown.attr,
+       NULL
+};
+
+static const struct attribute_group dcdbas_attr_group = {
+       .attrs = dcdbas_dev_attrs,
+       .bin_attrs = dcdbas_bin_attrs,
+};
+
+static int dcdbas_probe(struct platform_device *dev)
+{
+       int error;
+
+       host_control_action = HC_ACTION_NONE;
+       host_control_smi_type = HC_SMITYPE_NONE;
+
+       dcdbas_pdev = dev;
+
+       /* Check if ACPI WSMT table specifies protected SMI buffer address */
+       error = dcdbas_check_wsmt();
+       if (error < 0)
+               return error;
+
+       /*
+        * BIOS SMI calls require buffer addresses be in 32-bit address space.
+        * This is done by setting the DMA mask below.
+        */
+       error = dma_set_coherent_mask(&dcdbas_pdev->dev, DMA_BIT_MASK(32));
+       if (error)
+               return error;
+
+       error = sysfs_create_group(&dev->dev.kobj, &dcdbas_attr_group);
+       if (error)
+               return error;
+
+       register_reboot_notifier(&dcdbas_reboot_nb);
+
+       dev_info(&dev->dev, "%s (version %s)\n",
+                DRIVER_DESCRIPTION, DRIVER_VERSION);
+
+       return 0;
+}
+
+static int dcdbas_remove(struct platform_device *dev)
+{
+       unregister_reboot_notifier(&dcdbas_reboot_nb);
+       sysfs_remove_group(&dev->dev.kobj, &dcdbas_attr_group);
+
+       return 0;
+}
+
+static struct platform_driver dcdbas_driver = {
+       .driver         = {
+               .name   = DRIVER_NAME,
+       },
+       .probe          = dcdbas_probe,
+       .remove         = dcdbas_remove,
+};
+
+static const struct platform_device_info dcdbas_dev_info __initconst = {
+       .name           = DRIVER_NAME,
+       .id             = -1,
+       .dma_mask       = DMA_BIT_MASK(32),
+};
+
+static struct platform_device *dcdbas_pdev_reg;
+
+/**
+ * dcdbas_init: initialize driver
+ */
+static int __init dcdbas_init(void)
+{
+       int error;
+
+       error = platform_driver_register(&dcdbas_driver);
+       if (error)
+               return error;
+
+       dcdbas_pdev_reg = platform_device_register_full(&dcdbas_dev_info);
+       if (IS_ERR(dcdbas_pdev_reg)) {
+               error = PTR_ERR(dcdbas_pdev_reg);
+               goto err_unregister_driver;
+       }
+
+       return 0;
+
+ err_unregister_driver:
+       platform_driver_unregister(&dcdbas_driver);
+       return error;
+}
+
+/**
+ * dcdbas_exit: perform driver cleanup
+ */
+static void __exit dcdbas_exit(void)
+{
+       /*
+        * make sure functions that use dcdbas_pdev are called
+        * before platform_device_unregister
+        */
+       unregister_reboot_notifier(&dcdbas_reboot_nb);
+
+       /*
+        * We have to free the buffer here instead of dcdbas_remove
+        * because only in module exit function we can be sure that
+        * all sysfs attributes belonging to this module have been
+        * released.
+        */
+       if (dcdbas_pdev)
+               smi_data_buf_free();
+       if (eps_buffer)
+               memunmap(eps_buffer);
+       platform_device_unregister(dcdbas_pdev_reg);
+       platform_driver_unregister(&dcdbas_driver);
+}
+
+subsys_initcall_sync(dcdbas_init);
+module_exit(dcdbas_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION " (version " DRIVER_VERSION ")");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR("Dell Inc.");
+MODULE_LICENSE("GPL");
+/* Any System or BIOS claiming to be by Dell */
+MODULE_ALIAS("dmi:*:[bs]vnD[Ee][Ll][Ll]*:*");
diff --git a/drivers/platform/x86/dcdbas.h b/drivers/platform/x86/dcdbas.h
new file mode 100644 (file)
index 0000000..52729a4
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ *  dcdbas.h: Definitions for Dell Systems Management Base driver
+ *
+ *  Copyright (C) 1995-2005 Dell Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License v2.0 as published by
+ *  the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ */
+
+#ifndef _DCDBAS_H_
+#define _DCDBAS_H_
+
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define MAX_SMI_DATA_BUF_SIZE                  (256 * 1024)
+
+#define HC_ACTION_NONE                         (0)
+#define HC_ACTION_HOST_CONTROL_POWEROFF                BIT(1)
+#define HC_ACTION_HOST_CONTROL_POWERCYCLE      BIT(2)
+
+#define HC_SMITYPE_NONE                                (0)
+#define HC_SMITYPE_TYPE1                       (1)
+#define HC_SMITYPE_TYPE2                       (2)
+#define HC_SMITYPE_TYPE3                       (3)
+
+#define ESM_APM_CMD                            (0x0A0)
+#define ESM_APM_POWER_CYCLE                    (0x10)
+#define ESM_STATUS_CMD_UNSUCCESSFUL            (-1)
+
+#define CMOS_BASE_PORT                         (0x070)
+#define CMOS_PAGE1_INDEX_PORT                  (0)
+#define CMOS_PAGE1_DATA_PORT                   (1)
+#define CMOS_PAGE2_INDEX_PORT_PIIX4            (2)
+#define CMOS_PAGE2_DATA_PORT_PIIX4             (3)
+#define PE1400_APM_CONTROL_PORT                        (0x0B0)
+#define PCAT_APM_CONTROL_PORT                  (0x0B2)
+#define PCAT_APM_STATUS_PORT                   (0x0B3)
+#define PE1300_CMOS_CMD_STRUCT_PTR             (0x38)
+#define PE1400_CMOS_CMD_STRUCT_PTR             (0x70)
+
+#define MAX_SYSMGMT_SHORTCMD_PARMBUF_LEN       (14)
+#define MAX_SYSMGMT_LONGCMD_SGENTRY_NUM                (16)
+
+#define TIMEOUT_USEC_SHORT_SEMA_BLOCKING       (10000)
+#define EXPIRED_TIMER                          (0)
+
+#define SMI_CMD_MAGIC                          (0x534D4931)
+#define SMM_EPS_SIG                            "$SCB"
+
+#define DCDBAS_DEV_ATTR_RW(_name) \
+       DEVICE_ATTR(_name,0600,_name##_show,_name##_store);
+
+#define DCDBAS_DEV_ATTR_RO(_name) \
+       DEVICE_ATTR(_name,0400,_name##_show,NULL);
+
+#define DCDBAS_DEV_ATTR_WO(_name) \
+       DEVICE_ATTR(_name,0200,NULL,_name##_store);
+
+#define DCDBAS_BIN_ATTR_RW(_name) \
+struct bin_attribute bin_attr_##_name = { \
+       .attr =  { .name = __stringify(_name), \
+                  .mode = 0600 }, \
+       .read =  _name##_read, \
+       .write = _name##_write, \
+}
+
+struct smi_cmd {
+       __u32 magic;
+       __u32 ebx;
+       __u32 ecx;
+       __u16 command_address;
+       __u8 command_code;
+       __u8 reserved;
+       __u8 command_buffer[1];
+} __attribute__ ((packed));
+
+struct apm_cmd {
+       __u8 command;
+       __s8 status;
+       __u16 reserved;
+       union {
+               struct {
+                       __u8 parm[MAX_SYSMGMT_SHORTCMD_PARMBUF_LEN];
+               } __attribute__ ((packed)) shortreq;
+
+               struct {
+                       __u16 num_sg_entries;
+                       struct {
+                               __u32 size;
+                               __u64 addr;
+                       } __attribute__ ((packed))
+                           sglist[MAX_SYSMGMT_LONGCMD_SGENTRY_NUM];
+               } __attribute__ ((packed)) longreq;
+       } __attribute__ ((packed)) parameters;
+} __attribute__ ((packed));
+
+int dcdbas_smi_request(struct smi_cmd *smi_cmd);
+
+struct smm_eps_table {
+       char smm_comm_buff_anchor[4];
+       u8 length;
+       u8 checksum;
+       u8 version;
+       u64 smm_comm_buff_addr;
+       u64 num_of_4k_pages;
+} __packed;
+
+#endif /* _DCDBAS_H_ */
+
index 97a90bebc36079f5ff716b8d739bd27928aa442a..ab9b822a6dfe6df42a0ba237f7aa50fb984d39db 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
-#include "../../firmware/dcdbas.h"
+#include "dcdbas.h"
 #include "dell-smbios.h"
 
 static int da_command_address;
diff --git a/drivers/platform/x86/dell_rbu.c b/drivers/platform/x86/dell_rbu.c
new file mode 100644 (file)
index 0000000..ccefa84
--- /dev/null
@@ -0,0 +1,753 @@
+/*
+ * dell_rbu.c
+ * Bios Update driver for Dell systems
+ * Author: Dell Inc
+ *         Abhay Salunke <abhay_salunke@dell.com>
+ *
+ * Copyright (C) 2005 Dell Inc.
+ *
+ * Remote BIOS Update (rbu) driver is used for updating DELL BIOS by
+ * creating entries in the /sys file systems on Linux 2.6 and higher
+ * kernels. The driver supports two mechanism to update the BIOS namely
+ * contiguous and packetized. Both these methods still require having some
+ * application to set the CMOS bit indicating the BIOS to update itself
+ * after a reboot.
+ *
+ * Contiguous method:
+ * This driver writes the incoming data in a monolithic image by allocating
+ * contiguous physical pages large enough to accommodate the incoming BIOS
+ * image size.
+ *
+ * Packetized method:
+ * The driver writes the incoming packet image by allocating a new packet
+ * on every time the packet data is written. This driver requires an
+ * application to break the BIOS image in to fixed sized packet chunks.
+ *
+ * See Documentation/dell_rbu.txt for more info.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License v2.0 as published by
+ * the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/blkdev.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/moduleparam.h>
+#include <linux/firmware.h>
+#include <linux/dma-mapping.h>
+#include <asm/set_memory.h>
+
+MODULE_AUTHOR("Abhay Salunke <abhay_salunke@dell.com>");
+MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("3.2");
+
+#define BIOS_SCAN_LIMIT 0xffffffff
+#define MAX_IMAGE_LENGTH 16
+static struct _rbu_data {
+       void *image_update_buffer;
+       unsigned long image_update_buffer_size;
+       unsigned long bios_image_size;
+       int image_update_ordernum;
+       int dma_alloc;
+       spinlock_t lock;
+       unsigned long packet_read_count;
+       unsigned long num_packets;
+       unsigned long packetsize;
+       unsigned long imagesize;
+       int entry_created;
+} rbu_data;
+
+static char image_type[MAX_IMAGE_LENGTH + 1] = "mono";
+module_param_string(image_type, image_type, sizeof (image_type), 0);
+MODULE_PARM_DESC(image_type,
+       "BIOS image type. choose- mono or packet or init");
+
+static unsigned long allocation_floor = 0x100000;
+module_param(allocation_floor, ulong, 0644);
+MODULE_PARM_DESC(allocation_floor,
+    "Minimum address for allocations when using Packet mode");
+
+struct packet_data {
+       struct list_head list;
+       size_t length;
+       void *data;
+       int ordernum;
+};
+
+static struct packet_data packet_data_head;
+
+static struct platform_device *rbu_device;
+static int context;
+static dma_addr_t dell_rbu_dmaaddr;
+
+static void init_packet_head(void)
+{
+       INIT_LIST_HEAD(&packet_data_head.list);
+       rbu_data.packet_read_count = 0;
+       rbu_data.num_packets = 0;
+       rbu_data.packetsize = 0;
+       rbu_data.imagesize = 0;
+}
+
+static int create_packet(void *data, size_t length)
+{
+       struct packet_data *newpacket;
+       int ordernum = 0;
+       int retval = 0;
+       unsigned int packet_array_size = 0;
+       void **invalid_addr_packet_array = NULL;
+       void *packet_data_temp_buf = NULL;
+       unsigned int idx = 0;
+
+       pr_debug("create_packet: entry \n");
+
+       if (!rbu_data.packetsize) {
+               pr_debug("create_packet: packetsize not specified\n");
+               retval = -EINVAL;
+               goto out_noalloc;
+       }
+
+       spin_unlock(&rbu_data.lock);
+
+       newpacket = kzalloc(sizeof (struct packet_data), GFP_KERNEL);
+
+       if (!newpacket) {
+               printk(KERN_WARNING
+                       "dell_rbu:%s: failed to allocate new "
+                       "packet\n", __func__);
+               retval = -ENOMEM;
+               spin_lock(&rbu_data.lock);
+               goto out_noalloc;
+       }
+
+       ordernum = get_order(length);
+
+       /*
+        * BIOS errata mean we cannot allocate packets below 1MB or they will
+        * be overwritten by BIOS.
+        *
+        * array to temporarily hold packets
+        * that are below the allocation floor
+        *
+        * NOTE: very simplistic because we only need the floor to be at 1MB
+        *       due to BIOS errata. This shouldn't be used for higher floors
+        *       or you will run out of mem trying to allocate the array.
+        */
+       packet_array_size = max(
+                       (unsigned int)(allocation_floor / rbu_data.packetsize),
+                       (unsigned int)1);
+       invalid_addr_packet_array = kcalloc(packet_array_size, sizeof(void *),
+                                               GFP_KERNEL);
+
+       if (!invalid_addr_packet_array) {
+               printk(KERN_WARNING
+                       "dell_rbu:%s: failed to allocate "
+                       "invalid_addr_packet_array \n",
+                       __func__);
+               retval = -ENOMEM;
+               spin_lock(&rbu_data.lock);
+               goto out_alloc_packet;
+       }
+
+       while (!packet_data_temp_buf) {
+               packet_data_temp_buf = (unsigned char *)
+                       __get_free_pages(GFP_KERNEL, ordernum);
+               if (!packet_data_temp_buf) {
+                       printk(KERN_WARNING
+                               "dell_rbu:%s: failed to allocate new "
+                               "packet\n", __func__);
+                       retval = -ENOMEM;
+                       spin_lock(&rbu_data.lock);
+                       goto out_alloc_packet_array;
+               }
+
+               if ((unsigned long)virt_to_phys(packet_data_temp_buf)
+                               < allocation_floor) {
+                       pr_debug("packet 0x%lx below floor at 0x%lx.\n",
+                                       (unsigned long)virt_to_phys(
+                                               packet_data_temp_buf),
+                                       allocation_floor);
+                       invalid_addr_packet_array[idx++] = packet_data_temp_buf;
+                       packet_data_temp_buf = NULL;
+               }
+       }
+       /*
+        * set to uncachable or it may never get written back before reboot
+        */
+       set_memory_uc((unsigned long)packet_data_temp_buf, 1 << ordernum);
+
+       spin_lock(&rbu_data.lock);
+
+       newpacket->data = packet_data_temp_buf;
+
+       pr_debug("create_packet: newpacket at physical addr %lx\n",
+               (unsigned long)virt_to_phys(newpacket->data));
+
+       /* packets may not have fixed size */
+       newpacket->length = length;
+       newpacket->ordernum = ordernum;
+       ++rbu_data.num_packets;
+
+       /* initialize the newly created packet headers */
+       INIT_LIST_HEAD(&newpacket->list);
+       list_add_tail(&newpacket->list, &packet_data_head.list);
+
+       memcpy(newpacket->data, data, length);
+
+       pr_debug("create_packet: exit \n");
+
+out_alloc_packet_array:
+       /* always free packet array */
+       for (;idx>0;idx--) {
+               pr_debug("freeing unused packet below floor 0x%lx.\n",
+                       (unsigned long)virt_to_phys(
+                               invalid_addr_packet_array[idx-1]));
+               free_pages((unsigned long)invalid_addr_packet_array[idx-1],
+                       ordernum);
+       }
+       kfree(invalid_addr_packet_array);
+
+out_alloc_packet:
+       /* if error, free data */
+       if (retval)
+               kfree(newpacket);
+
+out_noalloc:
+       return retval;
+}
+
+static int packetize_data(const u8 *data, size_t length)
+{
+       int rc = 0;
+       int done = 0;
+       int packet_length;
+       u8 *temp;
+       u8 *end = (u8 *) data + length;
+       pr_debug("packetize_data: data length %zd\n", length);
+       if (!rbu_data.packetsize) {
+               printk(KERN_WARNING
+                       "dell_rbu: packetsize not specified\n");
+               return -EIO;
+       }
+
+       temp = (u8 *) data;
+
+       /* packetize the hunk */
+       while (!done) {
+               if ((temp + rbu_data.packetsize) < end)
+                       packet_length = rbu_data.packetsize;
+               else {
+                       /* this is the last packet */
+                       packet_length = end - temp;
+                       done = 1;
+               }
+
+               if ((rc = create_packet(temp, packet_length)))
+                       return rc;
+
+               pr_debug("%p:%td\n", temp, (end - temp));
+               temp += packet_length;
+       }
+
+       rbu_data.imagesize = length;
+
+       return rc;
+}
+
+static int do_packet_read(char *data, struct list_head *ptemp_list,
+       int length, int bytes_read, int *list_read_count)
+{
+       void *ptemp_buf;
+       struct packet_data *newpacket = NULL;
+       int bytes_copied = 0;
+       int j = 0;
+
+       newpacket = list_entry(ptemp_list, struct packet_data, list);
+       *list_read_count += newpacket->length;
+
+       if (*list_read_count > bytes_read) {
+               /* point to the start of unread data */
+               j = newpacket->length - (*list_read_count - bytes_read);
+               /* point to the offset in the packet buffer */
+               ptemp_buf = (u8 *) newpacket->data + j;
+               /*
+                * check if there is enough room in
+                * * the incoming buffer
+                */
+               if (length > (*list_read_count - bytes_read))
+                       /*
+                        * copy what ever is there in this
+                        * packet and move on
+                        */
+                       bytes_copied = (*list_read_count - bytes_read);
+               else
+                       /* copy the remaining */
+                       bytes_copied = length;
+               memcpy(data, ptemp_buf, bytes_copied);
+       }
+       return bytes_copied;
+}
+
+static int packet_read_list(char *data, size_t * pread_length)
+{
+       struct list_head *ptemp_list;
+       int temp_count = 0;
+       int bytes_copied = 0;
+       int bytes_read = 0;
+       int remaining_bytes = 0;
+       char *pdest = data;
+
+       /* check if we have any packets */
+       if (0 == rbu_data.num_packets)
+               return -ENOMEM;
+
+       remaining_bytes = *pread_length;
+       bytes_read = rbu_data.packet_read_count;
+
+       ptemp_list = (&packet_data_head.list)->next;
+       while (!list_empty(ptemp_list)) {
+               bytes_copied = do_packet_read(pdest, ptemp_list,
+                       remaining_bytes, bytes_read, &temp_count);
+               remaining_bytes -= bytes_copied;
+               bytes_read += bytes_copied;
+               pdest += bytes_copied;
+               /*
+                * check if we reached end of buffer before reaching the
+                * last packet
+                */
+               if (remaining_bytes == 0)
+                       break;
+
+               ptemp_list = ptemp_list->next;
+       }
+       /*finally set the bytes read */
+       *pread_length = bytes_read - rbu_data.packet_read_count;
+       rbu_data.packet_read_count = bytes_read;
+       return 0;
+}
+
+static void packet_empty_list(void)
+{
+       struct list_head *ptemp_list;
+       struct list_head *pnext_list;
+       struct packet_data *newpacket;
+
+       ptemp_list = (&packet_data_head.list)->next;
+       while (!list_empty(ptemp_list)) {
+               newpacket =
+                       list_entry(ptemp_list, struct packet_data, list);
+               pnext_list = ptemp_list->next;
+               list_del(ptemp_list);
+               ptemp_list = pnext_list;
+               /*
+                * zero out the RBU packet memory before freeing
+                * to make sure there are no stale RBU packets left in memory
+                */
+               memset(newpacket->data, 0, rbu_data.packetsize);
+               set_memory_wb((unsigned long)newpacket->data,
+                       1 << newpacket->ordernum);
+               free_pages((unsigned long) newpacket->data,
+                       newpacket->ordernum);
+               kfree(newpacket);
+       }
+       rbu_data.packet_read_count = 0;
+       rbu_data.num_packets = 0;
+       rbu_data.imagesize = 0;
+}
+
+/*
+ * img_update_free: Frees the buffer allocated for storing BIOS image
+ * Always called with lock held and returned with lock held
+ */
+static void img_update_free(void)
+{
+       if (!rbu_data.image_update_buffer)
+               return;
+       /*
+        * zero out this buffer before freeing it to get rid of any stale
+        * BIOS image copied in memory.
+        */
+       memset(rbu_data.image_update_buffer, 0,
+               rbu_data.image_update_buffer_size);
+       if (rbu_data.dma_alloc == 1)
+               dma_free_coherent(NULL, rbu_data.bios_image_size,
+                       rbu_data.image_update_buffer, dell_rbu_dmaaddr);
+       else
+               free_pages((unsigned long) rbu_data.image_update_buffer,
+                       rbu_data.image_update_ordernum);
+
+       /*
+        * Re-initialize the rbu_data variables after a free
+        */
+       rbu_data.image_update_ordernum = -1;
+       rbu_data.image_update_buffer = NULL;
+       rbu_data.image_update_buffer_size = 0;
+       rbu_data.bios_image_size = 0;
+       rbu_data.dma_alloc = 0;
+}
+
+/*
+ * img_update_realloc: This function allocates the contiguous pages to
+ * accommodate the requested size of data. The memory address and size
+ * values are stored globally and on every call to this function the new
+ * size is checked to see if more data is required than the existing size.
+ * If true the previous memory is freed and new allocation is done to
+ * accommodate the new size. If the incoming size is less then than the
+ * already allocated size, then that memory is reused. This function is
+ * called with lock held and returns with lock held.
+ */
+static int img_update_realloc(unsigned long size)
+{
+       unsigned char *image_update_buffer = NULL;
+       unsigned long rc;
+       unsigned long img_buf_phys_addr;
+       int ordernum;
+       int dma_alloc = 0;
+
+       /*
+        * check if the buffer of sufficient size has been
+        * already allocated
+        */
+       if (rbu_data.image_update_buffer_size >= size) {
+               /*
+                * check for corruption
+                */
+               if ((size != 0) && (rbu_data.image_update_buffer == NULL)) {
+                       printk(KERN_ERR "dell_rbu:%s: corruption "
+                               "check failed\n", __func__);
+                       return -EINVAL;
+               }
+               /*
+                * we have a valid pre-allocated buffer with
+                * sufficient size
+                */
+               return 0;
+       }
+
+       /*
+        * free any previously allocated buffer
+        */
+       img_update_free();
+
+       spin_unlock(&rbu_data.lock);
+
+       ordernum = get_order(size);
+       image_update_buffer =
+               (unsigned char *) __get_free_pages(GFP_KERNEL, ordernum);
+
+       img_buf_phys_addr =
+               (unsigned long) virt_to_phys(image_update_buffer);
+
+       if (img_buf_phys_addr > BIOS_SCAN_LIMIT) {
+               free_pages((unsigned long) image_update_buffer, ordernum);
+               ordernum = -1;
+               image_update_buffer = dma_alloc_coherent(NULL, size,
+                       &dell_rbu_dmaaddr, GFP_KERNEL);
+               dma_alloc = 1;
+       }
+
+       spin_lock(&rbu_data.lock);
+
+       if (image_update_buffer != NULL) {
+               rbu_data.image_update_buffer = image_update_buffer;
+               rbu_data.image_update_buffer_size = size;
+               rbu_data.bios_image_size =
+                       rbu_data.image_update_buffer_size;
+               rbu_data.image_update_ordernum = ordernum;
+               rbu_data.dma_alloc = dma_alloc;
+               rc = 0;
+       } else {
+               pr_debug("Not enough memory for image update:"
+                       "size = %ld\n", size);
+               rc = -ENOMEM;
+       }
+
+       return rc;
+}
+
+static ssize_t read_packet_data(char *buffer, loff_t pos, size_t count)
+{
+       int retval;
+       size_t bytes_left;
+       size_t data_length;
+       char *ptempBuf = buffer;
+
+       /* check to see if we have something to return */
+       if (rbu_data.num_packets == 0) {
+               pr_debug("read_packet_data: no packets written\n");
+               retval = -ENOMEM;
+               goto read_rbu_data_exit;
+       }
+
+       if (pos > rbu_data.imagesize) {
+               retval = 0;
+               printk(KERN_WARNING "dell_rbu:read_packet_data: "
+                       "data underrun\n");
+               goto read_rbu_data_exit;
+       }
+
+       bytes_left = rbu_data.imagesize - pos;
+       data_length = min(bytes_left, count);
+
+       if ((retval = packet_read_list(ptempBuf, &data_length)) < 0)
+               goto read_rbu_data_exit;
+
+       if ((pos + count) > rbu_data.imagesize) {
+               rbu_data.packet_read_count = 0;
+               /* this was the last copy */
+               retval = bytes_left;
+       } else
+               retval = count;
+
+      read_rbu_data_exit:
+       return retval;
+}
+
+static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
+{
+       /* check to see if we have something to return */
+       if ((rbu_data.image_update_buffer == NULL) ||
+               (rbu_data.bios_image_size == 0)) {
+               pr_debug("read_rbu_data_mono: image_update_buffer %p ,"
+                       "bios_image_size %lu\n",
+                       rbu_data.image_update_buffer,
+                       rbu_data.bios_image_size);
+               return -ENOMEM;
+       }
+
+       return memory_read_from_buffer(buffer, count, &pos,
+                       rbu_data.image_update_buffer, rbu_data.bios_image_size);
+}
+
+static ssize_t read_rbu_data(struct file *filp, struct kobject *kobj,
+                            struct bin_attribute *bin_attr,
+                            char *buffer, loff_t pos, size_t count)
+{
+       ssize_t ret_count = 0;
+
+       spin_lock(&rbu_data.lock);
+
+       if (!strcmp(image_type, "mono"))
+               ret_count = read_rbu_mono_data(buffer, pos, count);
+       else if (!strcmp(image_type, "packet"))
+               ret_count = read_packet_data(buffer, pos, count);
+       else
+               pr_debug("read_rbu_data: invalid image type specified\n");
+
+       spin_unlock(&rbu_data.lock);
+       return ret_count;
+}
+
+static void callbackfn_rbu(const struct firmware *fw, void *context)
+{
+       rbu_data.entry_created = 0;
+
+       if (!fw)
+               return;
+
+       if (!fw->size)
+               goto out;
+
+       spin_lock(&rbu_data.lock);
+       if (!strcmp(image_type, "mono")) {
+               if (!img_update_realloc(fw->size))
+                       memcpy(rbu_data.image_update_buffer,
+                               fw->data, fw->size);
+       } else if (!strcmp(image_type, "packet")) {
+               /*
+                * we need to free previous packets if a
+                * new hunk of packets needs to be downloaded
+                */
+               packet_empty_list();
+               if (packetize_data(fw->data, fw->size))
+                       /* Incase something goes wrong when we are
+                        * in middle of packetizing the data, we
+                        * need to free up whatever packets might
+                        * have been created before we quit.
+                        */
+                       packet_empty_list();
+       } else
+               pr_debug("invalid image type specified.\n");
+       spin_unlock(&rbu_data.lock);
+ out:
+       release_firmware(fw);
+}
+
+static ssize_t read_rbu_image_type(struct file *filp, struct kobject *kobj,
+                                  struct bin_attribute *bin_attr,
+                                  char *buffer, loff_t pos, size_t count)
+{
+       int size = 0;
+       if (!pos)
+               size = scnprintf(buffer, count, "%s\n", image_type);
+       return size;
+}
+
+static ssize_t write_rbu_image_type(struct file *filp, struct kobject *kobj,
+                                   struct bin_attribute *bin_attr,
+                                   char *buffer, loff_t pos, size_t count)
+{
+       int rc = count;
+       int req_firm_rc = 0;
+       int i;
+       spin_lock(&rbu_data.lock);
+       /*
+        * Find the first newline or space
+        */
+       for (i = 0; i < count; ++i)
+               if (buffer[i] == '\n' || buffer[i] == ' ') {
+                       buffer[i] = '\0';
+                       break;
+               }
+       if (i == count)
+               buffer[count] = '\0';
+
+       if (strstr(buffer, "mono"))
+               strcpy(image_type, "mono");
+       else if (strstr(buffer, "packet"))
+               strcpy(image_type, "packet");
+       else if (strstr(buffer, "init")) {
+               /*
+                * If due to the user error the driver gets in a bad
+                * state where even though it is loaded , the
+                * /sys/class/firmware/dell_rbu entries are missing.
+                * to cover this situation the user can recreate entries
+                * by writing init to image_type.
+                */
+               if (!rbu_data.entry_created) {
+                       spin_unlock(&rbu_data.lock);
+                       req_firm_rc = request_firmware_nowait(THIS_MODULE,
+                               FW_ACTION_NOHOTPLUG, "dell_rbu",
+                               &rbu_device->dev, GFP_KERNEL, &context,
+                               callbackfn_rbu);
+                       if (req_firm_rc) {
+                               printk(KERN_ERR
+                                       "dell_rbu:%s request_firmware_nowait"
+                                       " failed %d\n", __func__, rc);
+                               rc = -EIO;
+                       } else
+                               rbu_data.entry_created = 1;
+
+                       spin_lock(&rbu_data.lock);
+               }
+       } else {
+               printk(KERN_WARNING "dell_rbu: image_type is invalid\n");
+               spin_unlock(&rbu_data.lock);
+               return -EINVAL;
+       }
+
+       /* we must free all previous allocations */
+       packet_empty_list();
+       img_update_free();
+       spin_unlock(&rbu_data.lock);
+
+       return rc;
+}
+
+static ssize_t read_rbu_packet_size(struct file *filp, struct kobject *kobj,
+                                   struct bin_attribute *bin_attr,
+                                   char *buffer, loff_t pos, size_t count)
+{
+       int size = 0;
+       if (!pos) {
+               spin_lock(&rbu_data.lock);
+               size = scnprintf(buffer, count, "%lu\n", rbu_data.packetsize);
+               spin_unlock(&rbu_data.lock);
+       }
+       return size;
+}
+
+static ssize_t write_rbu_packet_size(struct file *filp, struct kobject *kobj,
+                                    struct bin_attribute *bin_attr,
+                                    char *buffer, loff_t pos, size_t count)
+{
+       unsigned long temp;
+       spin_lock(&rbu_data.lock);
+       packet_empty_list();
+       sscanf(buffer, "%lu", &temp);
+       if (temp < 0xffffffff)
+               rbu_data.packetsize = temp;
+
+       spin_unlock(&rbu_data.lock);
+       return count;
+}
+
+static struct bin_attribute rbu_data_attr = {
+       .attr = {.name = "data", .mode = 0444},
+       .read = read_rbu_data,
+};
+
+static struct bin_attribute rbu_image_type_attr = {
+       .attr = {.name = "image_type", .mode = 0644},
+       .read = read_rbu_image_type,
+       .write = write_rbu_image_type,
+};
+
+static struct bin_attribute rbu_packet_size_attr = {
+       .attr = {.name = "packet_size", .mode = 0644},
+       .read = read_rbu_packet_size,
+       .write = write_rbu_packet_size,
+};
+
+static int __init dcdrbu_init(void)
+{
+       int rc;
+       spin_lock_init(&rbu_data.lock);
+
+       init_packet_head();
+       rbu_device = platform_device_register_simple("dell_rbu", -1, NULL, 0);
+       if (IS_ERR(rbu_device)) {
+               printk(KERN_ERR
+                       "dell_rbu:%s:platform_device_register_simple "
+                       "failed\n", __func__);
+               return PTR_ERR(rbu_device);
+       }
+
+       rc = sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_data_attr);
+       if (rc)
+               goto out_devreg;
+       rc = sysfs_create_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr);
+       if (rc)
+               goto out_data;
+       rc = sysfs_create_bin_file(&rbu_device->dev.kobj,
+               &rbu_packet_size_attr);
+       if (rc)
+               goto out_imtype;
+
+       rbu_data.entry_created = 0;
+       return 0;
+
+out_imtype:
+       sysfs_remove_bin_file(&rbu_device->dev.kobj, &rbu_image_type_attr);
+out_data:
+       sysfs_remove_bin_file(&rbu_device->dev.kobj, &rbu_data_attr);
+out_devreg:
+       platform_device_unregister(rbu_device);
+       return rc;
+}
+
+static __exit void dcdrbu_exit(void)
+{
+       spin_lock(&rbu_data.lock);
+       packet_empty_list();
+       img_update_free();
+       spin_unlock(&rbu_data.lock);
+       platform_device_unregister(rbu_device);
+}
+
+module_exit(dcdrbu_exit);
+module_init(dcdrbu_init);
+
+/* vim:noet:ts=8:sw=8
+*/
index d4f1259ff5a233bc22bb0700ddb354ace4b40e58..b6489cba29853e1919cd21c36e7fd19f3c720748 100644 (file)
@@ -212,7 +212,7 @@ static int read_ec_data(acpi_handle handle, int cmd, unsigned long *data)
                        return 0;
                }
        }
-       pr_err("timeout in read_ec_cmd\n");
+       pr_err("timeout in %s\n", __func__);
        return -1;
 }
 
@@ -1146,6 +1146,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
                        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Y520-15IKBM"),
                },
        },
+       {
+               .ident = "Lenovo Legion Y530-15ICH",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion Y530-15ICH"),
+               },
+       },
        {
                .ident = "Lenovo Legion Y720-15IKB",
                .matches = {
index 6cf9b7fa5bf0486fcf7e9d0b3ed805cfe07ca19d..e28bcf61b12698c6bf88668fe542e49cca463d32 100644 (file)
@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  *  Intel HID event & 5 button array driver
  *
  *  Copyright (C) 2015 Alex Hung <alex.hung@canonical.com>
  *  Copyright (C) 2015 Andrew Lutomirski <luto@kernel.org>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
  */
 
 #include <linux/acpi.h>
index 7344d841f4d98d17810b5f6bdbee8965415af6e5..3b81cb896fedff37c231ed62de5c9d859a2df3ac 100644 (file)
@@ -1,26 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  *  Copyright 2013 Matthew Garrett <mjg59@srcf.ucam.org>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-
-#include <linux/init.h>
+#include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/acpi.h>
 
 MODULE_LICENSE("GPL");
 
@@ -53,12 +38,10 @@ static ssize_t irst_store_wakeup_events(struct device *dev,
        acpi = to_acpi_device(dev);
 
        error = kstrtoul(buf, 0, &value);
-
        if (error)
                return error;
 
        status = acpi_execute_simple_method(acpi->handle, "SFFS", value);
-
        if (ACPI_FAILURE(status))
                return -EINVAL;
 
@@ -99,12 +82,10 @@ static ssize_t irst_store_wakeup_time(struct device *dev,
        acpi = to_acpi_device(dev);
 
        error = kstrtoul(buf, 0, &value);
-
        if (error)
                return error;
 
        status = acpi_execute_simple_method(acpi->handle, "SFTV", value);
-
        if (ACPI_FAILURE(status))
                return -EINVAL;
 
index bbe4c06c769f66eabf8ed956cbb3361ef8407220..64c2dc93472f474869b98e418ac1a67bb74d5559 100644 (file)
@@ -1,25 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  *  Copyright 2013 Matthew Garrett <mjg59@srcf.ucam.org>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-
-#include <linux/init.h>
-#include <linux/module.h>
 #include <linux/acpi.h>
+#include <linux/module.h>
 
 MODULE_LICENSE("GPL");
 
@@ -44,6 +29,7 @@ static const struct acpi_device_id smartconnect_ids[] = {
        {"INT33A0", 0},
        {"", 0}
 };
+MODULE_DEVICE_TABLE(acpi, smartconnect_ids);
 
 static struct acpi_driver smartconnect_driver = {
        .owner = THIS_MODULE,
@@ -56,5 +42,3 @@ static struct acpi_driver smartconnect_driver = {
 };
 
 module_acpi_driver(smartconnect_driver);
-
-MODULE_DEVICE_TABLE(acpi, smartconnect_ids);
index c2257bd06f1863d848fc384b2ac5577294bef8f5..9ded8e2af312f23293a5a6a8a56a3c7b8d499c3e 100644 (file)
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * WMI Thunderbolt driver
  *
  * Copyright (C) 2017 Dell Inc. All Rights Reserved.
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License version 2 as published
- *  by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -38,12 +30,16 @@ static ssize_t force_power_store(struct device *dev,
        input.length = sizeof(u8);
        input.pointer = &mode;
        mode = hex_to_bin(buf[0]);
+       dev_dbg(dev, "force_power: storing %#x\n", mode);
        if (mode == 0 || mode == 1) {
                status = wmi_evaluate_method(INTEL_WMI_THUNDERBOLT_GUID, 0, 1,
                                             &input, NULL);
-               if (ACPI_FAILURE(status))
+               if (ACPI_FAILURE(status)) {
+                       dev_dbg(dev, "force_power: failed to evaluate ACPI method\n");
                        return -ENODEV;
+               }
        } else {
+               dev_dbg(dev, "force_power: unsupported mode\n");
                return -EINVAL;
        }
        return count;
@@ -95,4 +91,4 @@ module_wmi_driver(intel_wmi_thunderbolt_driver);
 MODULE_ALIAS("wmi:" INTEL_WMI_THUNDERBOLT_GUID);
 MODULE_AUTHOR("Mario Limonciello <mario.limonciello@dell.com>");
 MODULE_DESCRIPTION("Intel WMI Thunderbolt force power driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_atomisp2_pm.c b/drivers/platform/x86/intel_atomisp2_pm.c
new file mode 100644 (file)
index 0000000..9371603
--- /dev/null
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Dummy driver for Intel's Image Signal Processor found on Bay and Cherry
+ * Trail devices. The sole purpose of this driver is to allow the ISP to
+ * be put in D3.
+ *
+ * Copyright (C) 2018 Hans de Goede <hdegoede@redhat.com>
+ *
+ * Based on various non upstream patches for ISP support:
+ * Copyright (C) 2010-2017 Intel Corporation. All rights reserved.
+ * Copyright (c) 2010 Silicon Hive www.siliconhive.com.
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <asm/iosf_mbi.h>
+
+/* PCI configuration regs */
+#define PCI_INTERRUPT_CTRL             0x9c
+
+#define PCI_CSI_CONTROL                        0xe8
+#define PCI_CSI_CONTROL_PORTS_OFF_MASK 0x7
+
+/* IOSF BT_MBI_UNIT_PMC regs */
+#define ISPSSPM0                       0x39
+#define ISPSSPM0_ISPSSC_OFFSET         0
+#define ISPSSPM0_ISPSSC_MASK           0x00000003
+#define ISPSSPM0_ISPSSS_OFFSET         24
+#define ISPSSPM0_ISPSSS_MASK           0x03000000
+#define ISPSSPM0_IUNIT_POWER_ON                0x0
+#define ISPSSPM0_IUNIT_POWER_OFF       0x3
+
+static int isp_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+       unsigned long timeout;
+       u32 val;
+
+       pci_write_config_dword(dev, PCI_INTERRUPT_CTRL, 0);
+
+       /*
+        * MRFLD IUNIT DPHY is located in an always-power-on island
+        * MRFLD HW design need all CSI ports are disabled before
+        * powering down the IUNIT.
+        */
+       pci_read_config_dword(dev, PCI_CSI_CONTROL, &val);
+       val |= PCI_CSI_CONTROL_PORTS_OFF_MASK;
+       pci_write_config_dword(dev, PCI_CSI_CONTROL, val);
+
+       /* Write 0x3 to ISPSSPM0 bit[1:0] to power off the IUNIT */
+       iosf_mbi_modify(BT_MBI_UNIT_PMC, MBI_REG_READ, ISPSSPM0,
+                       ISPSSPM0_IUNIT_POWER_OFF, ISPSSPM0_ISPSSC_MASK);
+
+       /*
+        * There should be no IUNIT access while power-down is
+        * in progress HW sighting: 4567865
+        * Wait up to 50 ms for the IUNIT to shut down.
+        */
+       timeout = jiffies + msecs_to_jiffies(50);
+       while (1) {
+               /* Wait until ISPSSPM0 bit[25:24] shows 0x3 */
+               iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, ISPSSPM0, &val);
+               val = (val & ISPSSPM0_ISPSSS_MASK) >> ISPSSPM0_ISPSSS_OFFSET;
+               if (val == ISPSSPM0_IUNIT_POWER_OFF)
+                       break;
+
+               if (time_after(jiffies, timeout)) {
+                       dev_err(&dev->dev, "IUNIT power-off timeout.\n");
+                       return -EBUSY;
+               }
+               usleep_range(1000, 2000);
+       }
+
+       pm_runtime_allow(&dev->dev);
+       pm_runtime_put_sync_suspend(&dev->dev);
+
+       return 0;
+}
+
+static void isp_remove(struct pci_dev *dev)
+{
+       pm_runtime_get_sync(&dev->dev);
+       pm_runtime_forbid(&dev->dev);
+}
+
+static int isp_pci_suspend(struct device *dev)
+{
+       return 0;
+}
+
+static int isp_pci_resume(struct device *dev)
+{
+       return 0;
+}
+
+static UNIVERSAL_DEV_PM_OPS(isp_pm_ops, isp_pci_suspend,
+                           isp_pci_resume, NULL);
+
+static const struct pci_device_id isp_id_table[] = {
+       { PCI_VDEVICE(INTEL, 0x22b8), },
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, isp_id_table);
+
+static struct pci_driver isp_pci_driver = {
+       .name = "intel_atomisp2_pm",
+       .id_table = isp_id_table,
+       .probe = isp_probe,
+       .remove = isp_remove,
+       .driver.pm = &isp_pm_ops,
+};
+
+module_pci_driver(isp_pci_driver);
+
+MODULE_DESCRIPTION("Intel AtomISP2 dummy / power-management drv (for suspend)");
+MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
+MODULE_LICENSE("GPL v2");
index 227943a20212cca12188cc18730bc42d6807da4e..951c105bafc1b8b1fc276ee04274690fd5f15f30 100644 (file)
@@ -1,21 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * intel_bxtwc_tmu.c - Intel BXT Whiskey Cove PMIC TMU driver
+ * Intel BXT Whiskey Cove PMIC TMU driver
  *
  * Copyright (C) 2016 Intel Corporation. All rights reserved.
  *
  * This driver adds TMU (Time Management Unit) support for Intel BXT platform.
  * It enables the alarm wake-up functionality in the TMU unit of Whiskey Cove
  * PMIC.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/module.h>
index f40b1c1921064b734614705c72dc212700f979fc..464fe93657b53e03268eabb2f41f5cdd0583d96e 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel Cherry Trail ACPI INT33FE pseudo device driver
  *
  * Copyright (C) 2017 Hans de Goede <hdegoede@redhat.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Some Intel Cherry Trail based device which ship with Windows 10, have
  * this weird INT33FE ACPI device with a CRS table with 4 I2cSerialBusV2
  * resources, for 4 different chips attached to various i2c busses:
@@ -257,4 +254,4 @@ module_platform_driver(cht_int33fe_driver);
 
 MODULE_DESCRIPTION("Intel Cherry Trail ACPI INT33FE pseudo device driver");
 MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index 38b8e7cfe88c06a68fa351b7048247324fc89e31..0df2e82dd24924bd6d92ff74f2434b4eb1f30d64 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Power-button driver for Dollar Cove TI PMIC
  * Copyright (C) 2014 Intel Corp
index e89ad4964dc139daec390560bfcecb145cdd5505..4b8f7305fc8a773b8a0e06291b69672ef559a7ab 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel INT0002 "Virtual GPIO" driver
  *
@@ -9,10 +10,6 @@
  *
  * Author: Dyut Kumar Sil <dyut.k.sil@intel.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Some peripherals on Bay Trail and Cherry Trail platforms signal a Power
  * Management Event (PME) to the Power Management Controller (PMC) to wakeup
  * the system. When this happens software needs to clear the PME bus 0 status
 #define ICPU(model)    { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
 
 static const struct x86_cpu_id int0002_cpu_ids[] = {
-/*
- * Limit ourselves to Cherry Trail for now, until testing shows we
- * need to handle the INT0002 device on Baytrail too.
- *     ICPU(INTEL_FAM6_ATOM_SILVERMONT),        * Valleyview, Bay Trail *
- */
+       ICPU(INTEL_FAM6_ATOM_SILVERMONT),       /* Valleyview, Bay Trail  */
        ICPU(INTEL_FAM6_ATOM_AIRMONT),          /* Braswell, Cherry Trail */
        {}
 };
@@ -110,6 +103,21 @@ static void int0002_irq_mask(struct irq_data *data)
        outl(gpe_en_reg, GPE0A_EN_PORT);
 }
 
+static int int0002_irq_set_wake(struct irq_data *data, unsigned int on)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+       struct platform_device *pdev = to_platform_device(chip->parent);
+       int irq = platform_get_irq(pdev, 0);
+
+       /* Propagate to parent irq */
+       if (on)
+               enable_irq_wake(irq);
+       else
+               disable_irq_wake(irq);
+
+       return 0;
+}
+
 static irqreturn_t int0002_irq(int irq, void *data)
 {
        struct gpio_chip *chip = data;
@@ -132,6 +140,7 @@ static struct irq_chip int0002_irqchip = {
        .irq_ack                = int0002_irq_ack,
        .irq_mask               = int0002_irq_mask,
        .irq_unmask             = int0002_irq_unmask,
+       .irq_set_wake           = int0002_irq_set_wake,
 };
 
 static int int0002_probe(struct platform_device *pdev)
@@ -216,4 +225,4 @@ module_platform_driver(int0002_driver);
 
 MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
 MODULE_DESCRIPTION("Intel INT0002 Virtual GPIO driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index c5ece7ef08c6df36b2d1ccff788f99c3cb408abe..225638a1b09e45d762668615cc9de93a0db100ba 100644 (file)
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2009-2010 Intel Corporation
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
  * Authors:
  *     Jesse Barnes <jbarnes@virtuousgeek.org>
  */
@@ -1697,6 +1686,6 @@ static struct pci_driver ips_pci_driver = {
 
 module_pci_driver(ips_pci_driver);
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
 MODULE_DESCRIPTION("Intelligent Power Sharing Driver");
index 60f4e3ddbe9f5b3f84e02030a57b98793141e615..512ad234ad0d67fccdb05d241d37fd887da164d5 100644 (file)
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2010 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
  */
 
 void ips_link_to_i915_driver(void);
index ef9b0af8cdd36a6adc68fb2f3b835993bfa46ebe..77eb8709c931c03dbeda565b71c65e8d6f1d564e 100644 (file)
@@ -1,25 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- *  intel_menlow.c - Intel menlow Driver for thermal management extension
+ *  Intel menlow Driver for thermal management extension
  *
  *  Copyright (C) 2008 Intel Corp
  *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
  *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  *  This driver creates the sys I/F for programming the sensors.
  *  It also implements the driver for intel menlow memory controller (hardware
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/acpi.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/pm.h>
+#include <linux/slab.h>
 #include <linux/thermal.h>
-#include <linux/acpi.h>
+#include <linux/types.h>
 
 MODULE_AUTHOR("Thomas Sujith");
 MODULE_AUTHOR("Zhang Rui");
 MODULE_DESCRIPTION("Intel Menlow platform specific driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 
 /*
  * Memory controller device control
index 5ad44204a9c3c997bf237aacfb1bbb99ab292ef7..292bace83f1e3d95a8dae813370b1fc6610bc0df 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Power button driver for Intel MID platforms.
  *
@@ -5,18 +6,8 @@
  *
  * Author: Hong Liu <hong.liu@intel.com>
  * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
  */
 
-#include <linux/init.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/mfd/intel_msic.h>
@@ -121,12 +112,9 @@ static const struct mid_pb_ddata mrfld_ddata = {
        .setup  = mrfld_setup,
 };
 
-#define ICPU(model, ddata)     \
-       { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
-
 static const struct x86_cpu_id mid_pb_cpu_ids[] = {
-       ICPU(INTEL_FAM6_ATOM_SALTWELL_MID,              mfld_ddata),
-       ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID,    mrfld_ddata),
+       INTEL_CPU_FAM6(ATOM_SALTWELL_MID,       mfld_ddata),
+       INTEL_CPU_FAM6(ATOM_SILVERMONT_MID,     mrfld_ddata),
        {}
 };
 
index 008a76903cbfb62221ac4fd4bb8aaa7b46c56fb1..f402e2e74a38392e9abf1eae8debc3630dcc828c 100644 (file)
@@ -1,39 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * intel_mid_thermal.c - Intel MID platform thermal driver
+ * Intel MID platform thermal driver
  *
  * Copyright (C) 2011 Intel Corporation
  *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.        See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * Author: Durgadoss R <durgadoss.r@intel.com>
  */
 
 #define pr_fmt(fmt) "intel_mid_thermal: " fmt
 
-#include <linux/module.h>
-#include <linux/init.h>
+#include <linux/device.h>
 #include <linux/err.h>
+#include <linux/mfd/intel_msic.h>
+#include <linux/module.h>
 #include <linux/param.h>
-#include <linux/device.h>
 #include <linux/platform_device.h>
-#include <linux/slab.h>
 #include <linux/pm.h>
+#include <linux/slab.h>
 #include <linux/thermal.h>
-#include <linux/mfd/intel_msic.h>
 
 /* Number of thermal sensors */
 #define MSIC_THERMAL_SENSORS   4
@@ -567,4 +551,4 @@ module_platform_driver(mid_thermal_driver);
 
 MODULE_AUTHOR("Durgadoss R <durgadoss.r@intel.com>");
 MODULE_DESCRIPTION("Intel Medfield Platform Thermal Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index 5747f63c8d9f490c51edb31f5fc836ee89dab33f..3c0438ba385ee8494c14a4d24147201e04dcaee6 100644 (file)
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
- * intel_oaktrail.c - Intel OakTrail Platform support.
+ * Intel OakTrail Platform support
  *
  * Copyright (C) 2010-2011 Intel Corporation
  * Author: Yin Kangkai (kangkai.yin@intel.com)
@@ -8,21 +9,6 @@
  * <cezary.jackiewicz (at) gmail.com>, based on MSI driver
  * Copyright (C) 2006 Lennart Poettering <mzxreary (at) 0pointer (dot) de>
  *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- *  02110-1301, USA.
- *
  * This driver does below things:
  * 1. registers itself in the Linux backlight control in
  *    /sys/class/backlight/intel_oaktrail/
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/acpi.h>
-#include <linux/fb.h>
-#include <linux/mutex.h>
+#include <linux/backlight.h>
+#include <linux/dmi.h>
 #include <linux/err.h>
+#include <linux/fb.h>
 #include <linux/i2c.h>
-#include <linux/backlight.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/platform_device.h>
-#include <linux/dmi.h>
 #include <linux/rfkill.h>
+
 #include <acpi/video.h>
 
 #define DRIVER_NAME    "intel_oaktrail"
index 2d272a3e017621365de7d38dfbfa68bef7d73ab2..6b31d410cb09abeb57aec266a7896c4dbbec1dc3 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel Core SoC Power Management Controller Driver
  *
@@ -6,16 +7,6 @@
  *
  * Authors: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
  *          Vishwanath Somayaji <vishwanath.somayaji@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
index 93a7e99e1f8b8a0a8d241416c1540cc66c779f20..be045348ad86b2674c3b08b3c1af54b3844ea339 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel Core SoC Power Management Controller Header File
  *
@@ -6,16 +7,6 @@
  *
  * Authors: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
  *          Vishwanath Somayaji <vishwanath.somayaji@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 
 #ifndef PMC_CORE_H
index e7edc8c6393674dfdcfcf53cccb80da50fd255f2..7964ba22ef8d997d31e3cd83af9a56feb90dd088 100644 (file)
@@ -1,39 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * intel_pmc_ipc.c: Driver for the Intel PMC IPC mechanism
+ * Driver for the Intel PMC IPC mechanism
  *
  * (C) Copyright 2014-2015 Intel Corporation
  *
- * This driver is based on Intel SCU IPC driver(intel_scu_opc.c) by
+ * This driver is based on Intel SCU IPC driver(intel_scu_ipc.c) by
  *     Sreedhara DS <sreedhara.ds@intel.com>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
  * PMC running in ARC processor communicates with other entity running in IA
  * core through IPC mechanism which in turn messaging between IA core ad PMC.
  */
 
-#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
 #include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/init.h>
 #include <linux/device.h>
-#include <linux/pm.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
-#include <linux/interrupt.h>
+#include <linux/pm.h>
 #include <linux/pm_qos.h>
-#include <linux/kernel.h>
-#include <linux/bitops.h>
 #include <linux/sched.h>
-#include <linux/atomic.h>
-#include <linux/notifier.h>
-#include <linux/suspend.h>
-#include <linux/acpi.h>
-#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/spinlock.h>
+#include <linux/suspend.h>
 
 #include <asm/intel_pmc_ipc.h>
 
@@ -1029,7 +1024,7 @@ static void __exit intel_pmc_ipc_exit(void)
 
 MODULE_AUTHOR("Zha Qipeng <qipeng.zha@intel.com>");
 MODULE_DESCRIPTION("Intel PMC IPC driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 
 /* Some modules are dependent on this, so init earlier */
 fs_initcall(intel_pmc_ipc_init);
index 2efeab650345c044371aa7c56624e4d650ea7c93..79671927f4ef6ada5833419c58a8aeb23fd1621f 100644 (file)
@@ -1,25 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Driver for the Intel P-Unit Mailbox IPC mechanism
  *
  * (C) Copyright 2015 Intel Corporation
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * The heart of the P-Unit is the Foxton microcontroller and its firmware,
  * which provide mailbox interface for power management usage.
  */
 
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
 #include <linux/acpi.h>
-#include <linux/delay.h>
 #include <linux/bitops.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
+
 #include <asm/intel_punit_ipc.h>
 
 /* IPC Mailbox registers */
index 75c8fef7a482c41717c69aa8ef98b21ef4dd1043..cdab916fbf92775320400d1879e112dfdc8e4034 100644 (file)
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * intel_scu_ipc.c: Driver for the Intel SCU IPC mechanism
+ * Driver for the Intel SCU IPC mechanism
  *
  * (C) Copyright 2008-2010,2015 Intel Corporation
  * Author: Sreedhara DS (sreedhara.ds@intel.com)
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
  * SCU running in ARC processor communicates with other entity running in IA
  * core through IPC mechanism which in turn messaging between IA core ad SCU.
  * SCU has two IPC mechanism IPC-1 and IPC-2. IPC-1 is used between IA32 and
  * IPC-1 Driver provides an API for power control unit registers (e.g. MSIC)
  * along with other APIs.
  */
+
 #include <linux/delay.h>
+#include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/init.h>
-#include <linux/device.h>
-#include <linux/pm.h>
-#include <linux/pci.h>
 #include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/pm.h>
 #include <linux/sfi.h>
+
 #include <asm/intel-mid.h>
 #include <asm/intel_scu_ipc.h>
 
index aa454241489c9f541864f34828e19318514d3560..8afe6fa06d7b8d45a1f9f68e5483f1e191f62e91 100644 (file)
@@ -1,32 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * intel_scu_ipc.c: Driver for the Intel SCU IPC mechanism
+ * Driver for the Intel SCU IPC mechanism
  *
  * (C) Copyright 2008-2010 Intel Corporation
  * Author: Sreedhara DS (sreedhara.ds@intel.com)
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- * This driver provides ioctl interfaces to call intel scu ipc driver api
+ * This driver provides IOCTL interfaces to call Intel SCU IPC driver API.
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
 #include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/fs.h>
 #include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/uaccess.h>
 #include <linux/slab.h>
-#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
 #include <asm/intel_scu_ipc.h>
 
 static int major;
 
-/* ioctl commnds */
+/* IOCTL commands */
 #define        INTE_SCU_IPC_REGISTER_READ      0
 #define INTE_SCU_IPC_REGISTER_WRITE    1
 #define INTE_SCU_IPC_REGISTER_UPDATE   2
index f378621b5fe9d86632a853a33599b1efff88b05a..d4040bb222b485ead8c8caa8a1b3c4463e72dffa 100644 (file)
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel SoC Core Telemetry Driver
  * Copyright (C) 2015, Intel Corporation.
  * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  * Telemetry Framework provides platform related PM and performance statistics.
  * This file provides the core telemetry API implementation.
  */
@@ -460,4 +452,4 @@ module_exit(telemetry_module_exit);
 
 MODULE_AUTHOR("Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>");
 MODULE_DESCRIPTION("Intel SoC Telemetry Interface");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index cee08f23629245803783734af6229ac18c69e3e6..40bce560eb30d9cf2ca1a348d7b1eece30d00616 100644 (file)
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel SOC Telemetry debugfs Driver: Currently supports APL
  * Copyright (c) 2015, Intel Corporation.
  * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  * This file provides the debugfs interfaces for telemetry.
  * /sys/kernel/debug/telemetry/pss_info: Shows Primary Control Sub-Sys Counters
  * /sys/kernel/debug/telemetry/ioss_info: Shows IO Sub-System Counters
@@ -72,9 +64,6 @@
 #define TELEM_IOSS_DX_D0IX_EVTS                25
 #define TELEM_IOSS_PG_EVTS             30
 
-#define TELEM_DEBUGFS_CPU(model, data) \
-       { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&data}
-
 #define TELEM_CHECK_AND_PARSE_EVTS(EVTID, EVTNUM, BUF, EVTLOG, EVTDAT, MASK) { \
        if (evtlog[index].telem_evtid == (EVTID)) { \
                for (idx = 0; idx < (EVTNUM); idx++) \
@@ -319,8 +308,8 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = {
 };
 
 static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = {
-       TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_debugfs_conf),
-       TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf),
+       INTEL_CPU_FAM6(ATOM_GOLDMONT, telem_apl_debugfs_conf),
+       INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf),
        {}
 };
 
@@ -951,12 +940,16 @@ static int __init telemetry_debugfs_init(void)
        debugfs_conf = (struct telemetry_debugfs_conf *)id->driver_data;
 
        err = telemetry_pltconfig_valid();
-       if (err < 0)
+       if (err < 0) {
+               pr_info("Invalid pltconfig, ensure IPC1 device is enabled in BIOS\n");
                return -ENODEV;
+       }
 
        err = telemetry_debugfs_check_evts();
-       if (err < 0)
+       if (err < 0) {
+               pr_info("telemetry_debugfs_check_evts failed\n");
                return -EINVAL;
+       }
 
        register_pm_notifier(&pm_notifier);
 
@@ -1037,4 +1030,4 @@ module_exit(telemetry_debugfs_exit);
 MODULE_AUTHOR("Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>");
 MODULE_DESCRIPTION("Intel SoC Telemetry debugfs Interface");
 MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index fcc6bee51a422a1c95e205f0d2874fc746ed09e2..df8565bad595c72ba7b889a7d50afe2e0e75d9c6 100644 (file)
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel SOC Telemetry Platform Driver: Currently supports APL
  * Copyright (c) 2015, Intel Corporation.
  * All Rights Reserved.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  * This file provides the platform specific telemetry implementation for APL.
  * It used the PUNIT and PMC IPC interfaces for configuring the counters.
  * The accumulated results are fetched from SRAM.
@@ -1242,4 +1234,4 @@ module_exit(telemetry_module_exit);
 MODULE_AUTHOR("Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>");
 MODULE_DESCRIPTION("Intel SoC Telemetry Platform Driver");
 MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
index a6d5aa0c3c479dbd7f345a3f5666ae0ef72e456d..7b9cc841ab6557157a71691cf131cd43cc7ecb9c 100644 (file)
@@ -1,28 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel Turbo Boost Max Technology 3.0 legacy (non HWP) enumeration driver
  * Copyright (c) 2017, Intel Corporation.
  * All rights reserved.
  *
  * Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <linux/cpuhotplug.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/topology.h>
 #include <linux/workqueue.h>
-#include <linux/cpuhotplug.h>
-#include <linux/cpufeature.h>
+
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 
diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
new file mode 100644 (file)
index 0000000..c0bb1f8
--- /dev/null
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * lg-laptop.c - LG Gram ACPI features and hotkeys Driver
+ *
+ * Copyright (C) 2018 Matan Ziv-Av <matan@svgalib.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/input.h>
+#include <linux/input/sparse-keymap.h>
+#include <linux/kernel.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#define LED_DEVICE(_name, max) struct led_classdev _name = { \
+       .name           = __stringify(_name),   \
+       .max_brightness = max,                  \
+       .brightness_set = _name##_set,          \
+       .brightness_get = _name##_get,          \
+}
+
+MODULE_AUTHOR("Matan Ziv-Av");
+MODULE_DESCRIPTION("LG WMI Hotkey Driver");
+MODULE_LICENSE("GPL");
+
+#define WMI_EVENT_GUID0        "E4FB94F9-7F2B-4173-AD1A-CD1D95086248"
+#define WMI_EVENT_GUID1        "023B133E-49D1-4E10-B313-698220140DC2"
+#define WMI_EVENT_GUID2        "37BE1AC0-C3F2-4B1F-BFBE-8FDEAF2814D6"
+#define WMI_EVENT_GUID3        "911BAD44-7DF8-4FBB-9319-BABA1C4B293B"
+#define WMI_METHOD_WMAB "C3A72B38-D3EF-42D3-8CBB-D5A57049F66D"
+#define WMI_METHOD_WMBB "2B4F501A-BD3C-4394-8DCF-00A7D2BC8210"
+#define WMI_EVENT_GUID  WMI_EVENT_GUID0
+
+#define WMAB_METHOD     "\\XINI.WMAB"
+#define WMBB_METHOD     "\\XINI.WMBB"
+#define SB_GGOV_METHOD  "\\_SB.GGOV"
+#define GOV_TLED        0x2020008
+#define WM_GET          1
+#define WM_SET          2
+#define WM_KEY_LIGHT    0x400
+#define WM_TLED         0x404
+#define WM_FN_LOCK      0x407
+#define WM_BATT_LIMIT   0x61
+#define WM_READER_MODE  0xBF
+#define WM_FAN_MODE    0x33
+#define WMBB_USB_CHARGE 0x10B
+#define WMBB_BATT_LIMIT 0x10C
+
+#define PLATFORM_NAME   "lg-laptop"
+
+MODULE_ALIAS("wmi:" WMI_EVENT_GUID0);
+MODULE_ALIAS("wmi:" WMI_EVENT_GUID1);
+MODULE_ALIAS("wmi:" WMI_EVENT_GUID2);
+MODULE_ALIAS("wmi:" WMI_EVENT_GUID3);
+MODULE_ALIAS("wmi:" WMI_METHOD_WMAB);
+MODULE_ALIAS("wmi:" WMI_METHOD_WMBB);
+MODULE_ALIAS("acpi*:LGEX0815:*");
+
+static struct platform_device *pf_device;
+static struct input_dev *wmi_input_dev;
+
+static u32 inited;
+#define INIT_INPUT_WMI_0        0x01
+#define INIT_INPUT_WMI_2        0x02
+#define INIT_INPUT_ACPI         0x04
+#define INIT_TPAD_LED           0x08
+#define INIT_KBD_LED            0x10
+#define INIT_SPARSE_KEYMAP        0x80
+
+static const struct key_entry wmi_keymap[] = {
+       {KE_KEY, 0x70, {KEY_F15} },      /* LG control panel (F1) */
+       {KE_KEY, 0x74, {KEY_F13} },      /* Touchpad toggle (F5) */
+       {KE_KEY, 0xf020000, {KEY_F14} }, /* Read mode (F9) */
+       {KE_KEY, 0x10000000, {KEY_F16} },/* Keyboard backlight (F8) - pressing
+                                         * this key both sends an event and
+                                         * changes backlight level.
+                                         */
+       {KE_KEY, 0x80, {KEY_RFKILL} },
+       {KE_END, 0}
+};
+
+static int ggov(u32 arg0)
+{
+       union acpi_object args[1];
+       union acpi_object *r;
+       acpi_status status;
+       acpi_handle handle;
+       struct acpi_object_list arg;
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+       int res;
+
+       args[0].type = ACPI_TYPE_INTEGER;
+       args[0].integer.value = arg0;
+
+       status = acpi_get_handle(NULL, (acpi_string) SB_GGOV_METHOD, &handle);
+       if (ACPI_FAILURE(status)) {
+               pr_err("Cannot get handle");
+               return -ENODEV;
+       }
+
+       arg.count = 1;
+       arg.pointer = args;
+
+       status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+       if (ACPI_FAILURE(status)) {
+               acpi_handle_err(handle, "GGOV: call failed.\n");
+               return -EINVAL;
+       }
+
+       r = buffer.pointer;
+       if (r->type != ACPI_TYPE_INTEGER) {
+               kfree(r);
+               return -EINVAL;
+       }
+
+       res = r->integer.value;
+       kfree(r);
+
+       return res;
+}
+
+static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2)
+{
+       union acpi_object args[3];
+       acpi_status status;
+       acpi_handle handle;
+       struct acpi_object_list arg;
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+       args[0].type = ACPI_TYPE_INTEGER;
+       args[0].integer.value = method;
+       args[1].type = ACPI_TYPE_INTEGER;
+       args[1].integer.value = arg1;
+       args[2].type = ACPI_TYPE_INTEGER;
+       args[2].integer.value = arg2;
+
+       status = acpi_get_handle(NULL, (acpi_string) WMAB_METHOD, &handle);
+       if (ACPI_FAILURE(status)) {
+               pr_err("Cannot get handle");
+               return NULL;
+       }
+
+       arg.count = 3;
+       arg.pointer = args;
+
+       status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+       if (ACPI_FAILURE(status)) {
+               acpi_handle_err(handle, "WMAB: call failed.\n");
+               return NULL;
+       }
+
+       return buffer.pointer;
+}
+
+static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2)
+{
+       union acpi_object args[3];
+       acpi_status status;
+       acpi_handle handle;
+       struct acpi_object_list arg;
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+       u8 buf[32];
+
+       *(u32 *)buf = method_id;
+       *(u32 *)(buf + 4) = arg1;
+       *(u32 *)(buf + 16) = arg2;
+       args[0].type = ACPI_TYPE_INTEGER;
+       args[0].integer.value = 0; /* ignored */
+       args[1].type = ACPI_TYPE_INTEGER;
+       args[1].integer.value = 1; /* Must be 1 or 2. Does not matter which */
+       args[2].type = ACPI_TYPE_BUFFER;
+       args[2].buffer.length = 32;
+       args[2].buffer.pointer = buf;
+
+       status = acpi_get_handle(NULL, (acpi_string)WMBB_METHOD, &handle);
+       if (ACPI_FAILURE(status)) {
+               pr_err("Cannot get handle");
+               return NULL;
+       }
+
+       arg.count = 3;
+       arg.pointer = args;
+
+       status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+       if (ACPI_FAILURE(status)) {
+               acpi_handle_err(handle, "WMAB: call failed.\n");
+               return NULL;
+       }
+
+       return (union acpi_object *)buffer.pointer;
+}
+
+static void wmi_notify(u32 value, void *context)
+{
+       struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
+       union acpi_object *obj;
+       acpi_status status;
+       long data = (long)context;
+
+       pr_debug("event guid %li\n", data);
+       status = wmi_get_event_data(value, &response);
+       if (ACPI_FAILURE(status)) {
+               pr_err("Bad event status 0x%x\n", status);
+               return;
+       }
+
+       obj = (union acpi_object *)response.pointer;
+       if (!obj)
+               return;
+
+       if (obj->type == ACPI_TYPE_INTEGER) {
+               int eventcode = obj->integer.value;
+               struct key_entry *key;
+
+               key =
+                   sparse_keymap_entry_from_scancode(wmi_input_dev, eventcode);
+               if (key && key->type == KE_KEY)
+                       sparse_keymap_report_entry(wmi_input_dev, key, 1, true);
+       }
+
+       pr_debug("Type: %i    Eventcode: 0x%llx\n", obj->type,
+                obj->integer.value);
+       kfree(response.pointer);
+}
+
+static void wmi_input_setup(void)
+{
+       acpi_status status;
+
+       wmi_input_dev = input_allocate_device();
+       if (wmi_input_dev) {
+               wmi_input_dev->name = "LG WMI hotkeys";
+               wmi_input_dev->phys = "wmi/input0";
+               wmi_input_dev->id.bustype = BUS_HOST;
+
+               if (sparse_keymap_setup(wmi_input_dev, wmi_keymap, NULL) ||
+                   input_register_device(wmi_input_dev)) {
+                       pr_info("Cannot initialize input device");
+                       input_free_device(wmi_input_dev);
+                       return;
+               }
+
+               inited |= INIT_SPARSE_KEYMAP;
+               status = wmi_install_notify_handler(WMI_EVENT_GUID0, wmi_notify,
+                                                   (void *)0);
+               if (ACPI_SUCCESS(status))
+                       inited |= INIT_INPUT_WMI_0;
+
+               status = wmi_install_notify_handler(WMI_EVENT_GUID2, wmi_notify,
+                                                   (void *)2);
+               if (ACPI_SUCCESS(status))
+                       inited |= INIT_INPUT_WMI_2;
+       } else {
+               pr_info("Cannot allocate input device");
+       }
+}
+
+static void acpi_notify(struct acpi_device *device, u32 event)
+{
+       struct key_entry *key;
+
+       acpi_handle_debug(device->handle, "notify: %d\n", event);
+       if (inited & INIT_SPARSE_KEYMAP) {
+               key = sparse_keymap_entry_from_scancode(wmi_input_dev, 0x80);
+               if (key && key->type == KE_KEY)
+                       sparse_keymap_report_entry(wmi_input_dev, key, 1, true);
+       }
+}
+
+static ssize_t fan_mode_store(struct device *dev,
+                             struct device_attribute *attr,
+                             const char *buffer, size_t count)
+{
+       bool value;
+       union acpi_object *r;
+       u32 m;
+       int ret;
+
+       ret = kstrtobool(buffer, &value);
+       if (ret)
+               return ret;
+
+       r = lg_wmab(WM_FAN_MODE, WM_GET, 0);
+       if (!r)
+               return -EIO;
+
+       if (r->type != ACPI_TYPE_INTEGER) {
+               kfree(r);
+               return -EIO;
+       }
+
+       m = r->integer.value;
+       kfree(r);
+       r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4));
+       kfree(r);
+       r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value);
+       kfree(r);
+
+       return count;
+}
+
+static ssize_t fan_mode_show(struct device *dev,
+                            struct device_attribute *attr, char *buffer)
+{
+       unsigned int status;
+       union acpi_object *r;
+
+       r = lg_wmab(WM_FAN_MODE, WM_GET, 0);
+       if (!r)
+               return -EIO;
+
+       if (r->type != ACPI_TYPE_INTEGER) {
+               kfree(r);
+               return -EIO;
+       }
+
+       status = r->integer.value & 0x01;
+       kfree(r);
+
+       return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static ssize_t usb_charge_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buffer, size_t count)
+{
+       bool value;
+       union acpi_object *r;
+       int ret;
+
+       ret = kstrtobool(buffer, &value);
+       if (ret)
+               return ret;
+
+       r = lg_wmbb(WMBB_USB_CHARGE, WM_SET, value);
+       if (!r)
+               return -EIO;
+
+       kfree(r);
+       return count;
+}
+
+static ssize_t usb_charge_show(struct device *dev,
+                              struct device_attribute *attr, char *buffer)
+{
+       unsigned int status;
+       union acpi_object *r;
+
+       r = lg_wmbb(WMBB_USB_CHARGE, WM_GET, 0);
+       if (!r)
+               return -EIO;
+
+       if (r->type != ACPI_TYPE_BUFFER) {
+               kfree(r);
+               return -EIO;
+       }
+
+       status = !!r->buffer.pointer[0x10];
+
+       kfree(r);
+
+       return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static ssize_t reader_mode_store(struct device *dev,
+                                struct device_attribute *attr,
+                                const char *buffer, size_t count)
+{
+       bool value;
+       union acpi_object *r;
+       int ret;
+
+       ret = kstrtobool(buffer, &value);
+       if (ret)
+               return ret;
+
+       r = lg_wmab(WM_READER_MODE, WM_SET, value);
+       if (!r)
+               return -EIO;
+
+       kfree(r);
+       return count;
+}
+
+static ssize_t reader_mode_show(struct device *dev,
+                               struct device_attribute *attr, char *buffer)
+{
+       unsigned int status;
+       union acpi_object *r;
+
+       r = lg_wmab(WM_READER_MODE, WM_GET, 0);
+       if (!r)
+               return -EIO;
+
+       if (r->type != ACPI_TYPE_INTEGER) {
+               kfree(r);
+               return -EIO;
+       }
+
+       status = !!r->integer.value;
+
+       kfree(r);
+
+       return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static ssize_t fn_lock_store(struct device *dev,
+                            struct device_attribute *attr,
+                            const char *buffer, size_t count)
+{
+       bool value;
+       union acpi_object *r;
+       int ret;
+
+       ret = kstrtobool(buffer, &value);
+       if (ret)
+               return ret;
+
+       r = lg_wmab(WM_FN_LOCK, WM_SET, value);
+       if (!r)
+               return -EIO;
+
+       kfree(r);
+       return count;
+}
+
+static ssize_t fn_lock_show(struct device *dev,
+                           struct device_attribute *attr, char *buffer)
+{
+       unsigned int status;
+       union acpi_object *r;
+
+       r = lg_wmab(WM_FN_LOCK, WM_GET, 0);
+       if (!r)
+               return -EIO;
+
+       if (r->type != ACPI_TYPE_BUFFER) {
+               kfree(r);
+               return -EIO;
+       }
+
+       status = !!r->buffer.pointer[0];
+       kfree(r);
+
+       return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static ssize_t battery_care_limit_store(struct device *dev,
+                                       struct device_attribute *attr,
+                                       const char *buffer, size_t count)
+{
+       unsigned long value;
+       int ret;
+
+       ret = kstrtoul(buffer, 10, &value);
+       if (ret)
+               return ret;
+
+       if (value == 100 || value == 80) {
+               union acpi_object *r;
+
+               r = lg_wmab(WM_BATT_LIMIT, WM_SET, value);
+               if (!r)
+                       return -EIO;
+
+               kfree(r);
+               return count;
+       }
+
+       return -EINVAL;
+}
+
+static ssize_t battery_care_limit_show(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char *buffer)
+{
+       unsigned int status;
+       union acpi_object *r;
+
+       r = lg_wmab(WM_BATT_LIMIT, WM_GET, 0);
+       if (!r)
+               return -EIO;
+
+       if (r->type != ACPI_TYPE_INTEGER) {
+               kfree(r);
+               return -EIO;
+       }
+
+       status = r->integer.value;
+       kfree(r);
+       if (status != 80 && status != 100)
+               status = 0;
+
+       return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static DEVICE_ATTR_RW(fan_mode);
+static DEVICE_ATTR_RW(usb_charge);
+static DEVICE_ATTR_RW(reader_mode);
+static DEVICE_ATTR_RW(fn_lock);
+static DEVICE_ATTR_RW(battery_care_limit);
+
+static struct attribute *dev_attributes[] = {
+       &dev_attr_fan_mode.attr,
+       &dev_attr_usb_charge.attr,
+       &dev_attr_reader_mode.attr,
+       &dev_attr_fn_lock.attr,
+       &dev_attr_battery_care_limit.attr,
+       NULL
+};
+
+static const struct attribute_group dev_attribute_group = {
+       .attrs = dev_attributes,
+};
+
+static void tpad_led_set(struct led_classdev *cdev,
+                        enum led_brightness brightness)
+{
+       union acpi_object *r;
+
+       r = lg_wmab(WM_TLED, WM_SET, brightness > LED_OFF);
+       kfree(r);
+}
+
+static enum led_brightness tpad_led_get(struct led_classdev *cdev)
+{
+       return ggov(GOV_TLED) > 0 ? LED_ON : LED_OFF;
+}
+
+static LED_DEVICE(tpad_led, 1);
+
+static void kbd_backlight_set(struct led_classdev *cdev,
+                             enum led_brightness brightness)
+{
+       u32 val;
+       union acpi_object *r;
+
+       val = 0x22;
+       if (brightness <= LED_OFF)
+               val = 0;
+       if (brightness >= LED_FULL)
+               val = 0x24;
+       r = lg_wmab(WM_KEY_LIGHT, WM_SET, val);
+       kfree(r);
+}
+
+static enum led_brightness kbd_backlight_get(struct led_classdev *cdev)
+{
+       union acpi_object *r;
+       int val;
+
+       r = lg_wmab(WM_KEY_LIGHT, WM_GET, 0);
+
+       if (!r)
+               return LED_OFF;
+
+       if (r->type != ACPI_TYPE_BUFFER || r->buffer.pointer[1] != 0x05) {
+               kfree(r);
+               return LED_OFF;
+       }
+
+       switch (r->buffer.pointer[0] & 0x27) {
+       case 0x24:
+               val = LED_FULL;
+               break;
+       case 0x22:
+               val = LED_HALF;
+               break;
+       default:
+               val = LED_OFF;
+       }
+
+       kfree(r);
+
+       return val;
+}
+
+static LED_DEVICE(kbd_backlight, 255);
+
+static void wmi_input_destroy(void)
+{
+       if (inited & INIT_INPUT_WMI_2)
+               wmi_remove_notify_handler(WMI_EVENT_GUID2);
+
+       if (inited & INIT_INPUT_WMI_0)
+               wmi_remove_notify_handler(WMI_EVENT_GUID0);
+
+       if (inited & INIT_SPARSE_KEYMAP)
+               input_unregister_device(wmi_input_dev);
+
+       inited &= ~(INIT_INPUT_WMI_0 | INIT_INPUT_WMI_2 | INIT_SPARSE_KEYMAP);
+}
+
+static struct platform_driver pf_driver = {
+       .driver = {
+                  .name = PLATFORM_NAME,
+       }
+};
+
+static int acpi_add(struct acpi_device *device)
+{
+       int ret;
+
+       if (pf_device)
+               return 0;
+
+       ret = platform_driver_register(&pf_driver);
+       if (ret)
+               return ret;
+
+       pf_device = platform_device_register_simple(PLATFORM_NAME,
+                                                   PLATFORM_DEVID_NONE,
+                                                   NULL, 0);
+       if (IS_ERR(pf_device)) {
+               ret = PTR_ERR(pf_device);
+               pf_device = NULL;
+               pr_err("unable to register platform device\n");
+               goto out_platform_registered;
+       }
+
+       ret = sysfs_create_group(&pf_device->dev.kobj, &dev_attribute_group);
+       if (ret)
+               goto out_platform_device;
+
+       if (!led_classdev_register(&pf_device->dev, &kbd_backlight))
+               inited |= INIT_KBD_LED;
+
+       if (!led_classdev_register(&pf_device->dev, &tpad_led))
+               inited |= INIT_TPAD_LED;
+
+       wmi_input_setup();
+
+       return 0;
+
+out_platform_device:
+       platform_device_unregister(pf_device);
+out_platform_registered:
+       platform_driver_unregister(&pf_driver);
+       return ret;
+}
+
+static int acpi_remove(struct acpi_device *device)
+{
+       sysfs_remove_group(&pf_device->dev.kobj, &dev_attribute_group);
+       if (inited & INIT_KBD_LED)
+               led_classdev_unregister(&kbd_backlight);
+
+       if (inited & INIT_TPAD_LED)
+               led_classdev_unregister(&tpad_led);
+
+       wmi_input_destroy();
+       platform_device_unregister(pf_device);
+       pf_device = NULL;
+       platform_driver_unregister(&pf_driver);
+
+       return 0;
+}
+
+static const struct acpi_device_id device_ids[] = {
+       {"LGEX0815", 0},
+       {"", 0}
+};
+MODULE_DEVICE_TABLE(acpi, device_ids);
+
+static struct acpi_driver acpi_driver = {
+       .name = "LG Gram Laptop Support",
+       .class = "lg-laptop",
+       .ids = device_ids,
+       .ops = {
+               .add = acpi_add,
+               .remove = acpi_remove,
+               .notify = acpi_notify,
+               },
+       .owner = THIS_MODULE,
+};
+
+static int __init acpi_init(void)
+{
+       int result;
+
+       result = acpi_bus_register_driver(&acpi_driver);
+       if (result < 0) {
+               ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Error registering driver\n"));
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static void __exit acpi_exit(void)
+{
+       acpi_bus_unregister_driver(&acpi_driver);
+}
+
+module_init(acpi_init);
+module_exit(acpi_exit);
index d89936c93ba0fe069900d739e6f8d8ac1e02f902..c2c3a1a19879596bd905952a9e028e96ea7cba01 100644 (file)
@@ -575,7 +575,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_msn201x_items[] = {
 
 static
 struct mlxreg_core_hotplug_platform_data mlxplat_mlxcpld_msn201x_data = {
-       .items = mlxplat_mlxcpld_msn21xx_items,
+       .items = mlxplat_mlxcpld_msn201x_items,
        .counter = ARRAY_SIZE(mlxplat_mlxcpld_msn201x_items),
        .cell = MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET,
        .mask = MLXPLAT_CPLD_AGGR_MASK_DEF,
index cb204f9734913a4cc40b656bb125f8ac1e3c22b4..5f2d7ea912b56e89beb0fa75f8d502e1ef3e9f99 100644 (file)
@@ -42,10 +42,13 @@ static const struct ts_dmi_data chuwi_hi8_data = {
 };
 
 static const struct property_entry chuwi_hi8_pro_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 6),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 3),
        PROPERTY_ENTRY_U32("touchscreen-size-x", 1728),
        PROPERTY_ENTRY_U32("touchscreen-size-y", 1148),
        PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
        PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-chuwi-hi8-pro.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
        PROPERTY_ENTRY_BOOL("silead,home-button"),
        { }
 };
@@ -56,6 +59,8 @@ static const struct ts_dmi_data chuwi_hi8_pro_data = {
 };
 
 static const struct property_entry chuwi_vi8_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 4),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 6),
        PROPERTY_ENTRY_U32("touchscreen-size-x", 1724),
        PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
        PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
@@ -88,9 +93,9 @@ static const struct ts_dmi_data chuwi_vi10_data = {
 
 static const struct property_entry connect_tablet9_props[] = {
        PROPERTY_ENTRY_U32("touchscreen-min-x", 9),
-       PROPERTY_ENTRY_U32("touchscreen-min-y", 8),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 10),
        PROPERTY_ENTRY_U32("touchscreen-size-x", 1664),
-       PROPERTY_ENTRY_U32("touchscreen-size-y", 878),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 880),
        PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
        PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
        PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-connect-tablet9.fw"),
@@ -104,8 +109,10 @@ static const struct ts_dmi_data connect_tablet9_data = {
 };
 
 static const struct property_entry cube_iwork8_air_props[] = {
-       PROPERTY_ENTRY_U32("touchscreen-size-x", 1660),
-       PROPERTY_ENTRY_U32("touchscreen-size-y", 900),
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 3),
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 1664),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 896),
        PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
        PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-cube-iwork8-air.fw"),
        PROPERTY_ENTRY_U32("silead,max-fingers", 10),
@@ -179,11 +186,14 @@ static const struct ts_dmi_data gp_electronic_t701_data = {
 };
 
 static const struct property_entry itworks_tw891_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 5),
        PROPERTY_ENTRY_U32("touchscreen-size-x", 1600),
-       PROPERTY_ENTRY_U32("touchscreen-size-y", 890),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 896),
        PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
        PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
        PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-itworks-tw891.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
        { }
 };
 
@@ -207,8 +217,10 @@ static const struct ts_dmi_data jumper_ezpad_6_pro_data = {
 };
 
 static const struct property_entry jumper_ezpad_mini3_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 23),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 16),
        PROPERTY_ENTRY_U32("touchscreen-size-x", 1700),
-       PROPERTY_ENTRY_U32("touchscreen-size-y", 1150),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 1138),
        PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
        PROPERTY_ENTRY_STRING("firmware-name", "gsl3676-jumper-ezpad-mini3.fw"),
        PROPERTY_ENTRY_U32("silead,max-fingers", 10),
@@ -237,6 +249,24 @@ static const struct ts_dmi_data onda_obook_20_plus_data = {
        .properties     = onda_obook_20_plus_props,
 };
 
+static const struct property_entry onda_v80_plus_v3_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 22),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 15),
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 1698),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
+       PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+       PROPERTY_ENTRY_STRING("firmware-name",
+                             "gsl3676-onda-v80-plus-v3.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+       PROPERTY_ENTRY_BOOL("silead,home-button"),
+       { }
+};
+
+static const struct ts_dmi_data onda_v80_plus_v3_data = {
+       .acpi_name      = "MSSL1680:00",
+       .properties     = onda_v80_plus_v3_props,
+};
+
 static const struct property_entry onda_v820w_32g_props[] = {
        PROPERTY_ENTRY_U32("touchscreen-size-x", 1665),
        PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
@@ -322,11 +352,14 @@ static const struct ts_dmi_data pov_mobii_wintab_p800w_v20_data = {
 };
 
 static const struct property_entry pov_mobii_wintab_p800w_v21_props[] = {
-       PROPERTY_ENTRY_U32("touchscreen-size-x", 1800),
-       PROPERTY_ENTRY_U32("touchscreen-size-y", 1150),
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 8),
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 1794),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 1148),
        PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
        PROPERTY_ENTRY_STRING("firmware-name",
                              "gsl3692-pov-mobii-wintab-p800w.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
        PROPERTY_ENTRY_BOOL("silead,home-button"),
        { }
 };
@@ -366,6 +399,22 @@ static const struct ts_dmi_data teclast_x98plus2_data = {
        .properties     = teclast_x98plus2_props,
 };
 
+static const struct property_entry trekstor_primebook_c11_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 1970),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 1530),
+       PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+       PROPERTY_ENTRY_STRING("firmware-name",
+                             "gsl1680-trekstor-primebook-c11.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+       PROPERTY_ENTRY_BOOL("silead,home-button"),
+       { }
+};
+
+static const struct ts_dmi_data trekstor_primebook_c11_data = {
+       .acpi_name      = "MSSL1680:00",
+       .properties     = trekstor_primebook_c11_props,
+};
+
 static const struct property_entry trekstor_primebook_c13_props[] = {
        PROPERTY_ENTRY_U32("touchscreen-size-x", 2624),
        PROPERTY_ENTRY_U32("touchscreen-size-y", 1920),
@@ -381,6 +430,22 @@ static const struct ts_dmi_data trekstor_primebook_c13_data = {
        .properties     = trekstor_primebook_c13_props,
 };
 
+static const struct property_entry trekstor_primetab_t13b_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 2500),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 1900),
+       PROPERTY_ENTRY_STRING("firmware-name",
+                             "gsl1680-trekstor-primetab-t13b.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+       PROPERTY_ENTRY_BOOL("silead,home-button"),
+       PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+       { }
+};
+
+static const struct ts_dmi_data trekstor_primetab_t13b_data = {
+       .acpi_name  = "MSSL1680:00",
+       .properties = trekstor_primetab_t13b_props,
+};
+
 static const struct property_entry trekstor_surftab_twin_10_1_props[] = {
        PROPERTY_ENTRY_U32("touchscreen-size-x", 1900),
        PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
@@ -397,6 +462,8 @@ static const struct ts_dmi_data trekstor_surftab_twin_10_1_data = {
 };
 
 static const struct property_entry trekstor_surftab_wintron70_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 12),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 8),
        PROPERTY_ENTRY_U32("touchscreen-size-x", 884),
        PROPERTY_ENTRY_U32("touchscreen-size-y", 632),
        PROPERTY_ENTRY_STRING("firmware-name",
@@ -555,6 +622,14 @@ static const struct dmi_system_id touchscreen_dmi_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "OBOOK 20 PLUS"),
                },
        },
+       {
+               /* ONDA V80 plus v3 (P80PSBG9V3A01501) */
+               .driver_data = (void *)&onda_v80_plus_v3_data,
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONDA"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "V80 PLUS")
+               },
+       },
        {
                /* ONDA V820w DualOS */
                .driver_data = (void *)&onda_v820w_32g_data,
@@ -640,6 +715,14 @@ static const struct dmi_system_id touchscreen_dmi_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "X98 Plus II"),
                },
        },
+       {
+               /* Trekstor Primebook C11 */
+               .driver_data = (void *)&trekstor_primebook_c11_data,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "TREKSTOR"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Primebook C11"),
+               },
+       },
        {
                /* Trekstor Primebook C13 */
                .driver_data = (void *)&trekstor_primebook_c13_data,
@@ -648,6 +731,14 @@ static const struct dmi_system_id touchscreen_dmi_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "Primebook C13"),
                },
        },
+       {
+               /* Trekstor Primetab T13B */
+               .driver_data = (void *)&trekstor_primetab_t13b_data,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "TREKSTOR"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Primetab T13B"),
+               },
+       },
        {
                /* TrekStor SurfTab twin 10.1 ST10432-8 */
                .driver_data = (void *)&trekstor_surftab_twin_10_1_data,
index 04791ea5d97b69f9ed9fc30fb7de46cf2370acbe..bea35be68706d733cf156b993fe0a8b3d3a675e5 100644 (file)
@@ -987,19 +987,19 @@ static struct bus_type wmi_bus_type = {
        .remove = wmi_dev_remove,
 };
 
-static struct device_type wmi_type_event = {
+static const struct device_type wmi_type_event = {
        .name = "event",
        .groups = wmi_event_groups,
        .release = wmi_dev_release,
 };
 
-static struct device_type wmi_type_method = {
+static const struct device_type wmi_type_method = {
        .name = "method",
        .groups = wmi_method_groups,
        .release = wmi_dev_release,
 };
 
-static struct device_type wmi_type_data = {
+static const struct device_type wmi_type_data = {
        .name = "data",
        .groups = wmi_data_groups,
        .release = wmi_dev_release,
index 504d252716f2e10db4914c8de651dff05106b82c..27e5dd47a01f9564fdff4c172be76b9694223e7e 100644 (file)
@@ -447,10 +447,9 @@ config PWM_TEGRA
 
 config  PWM_TIECAP
        tristate "ECAP PWM support"
-       depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE
+       depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE || ARCH_K3
        help
-         PWM driver support for the ECAP APWM controller found on AM33XX
-         TI SOC
+         PWM driver support for the ECAP APWM controller found on TI SOCs
 
          To compile this driver as a module, choose M here: the module
          will be called pwm-tiecap.
index 5561b9e190f84a63513ff3b86ecbeef7461404e8..757230e1f575e618199d03be157f0b1bf864d243 100644 (file)
@@ -30,6 +30,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = {
        .clk_rate = 19200000,
        .npwm = 1,
        .base_unit_bits = 16,
+       .other_devices_aml_touches_pwm_regs = true,
 };
 
 /* Broxton */
@@ -60,6 +61,7 @@ static int pwm_lpss_probe_platform(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, lpwm);
 
+       dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
        pm_runtime_set_active(&pdev->dev);
        pm_runtime_enable(&pdev->dev);
 
@@ -74,13 +76,29 @@ static int pwm_lpss_remove_platform(struct platform_device *pdev)
        return pwm_lpss_remove(lpwm);
 }
 
-static SIMPLE_DEV_PM_OPS(pwm_lpss_platform_pm_ops,
-                        pwm_lpss_suspend,
-                        pwm_lpss_resume);
+static int pwm_lpss_prepare(struct device *dev)
+{
+       struct pwm_lpss_chip *lpwm = dev_get_drvdata(dev);
+
+       /*
+        * If other device's AML code touches the PWM regs on suspend/resume
+        * force runtime-resume the PWM controller to allow this.
+        */
+       if (lpwm->info->other_devices_aml_touches_pwm_regs)
+               return 0; /* Force runtime-resume */
+
+       return 1; /* If runtime-suspended leave as is */
+}
+
+static const struct dev_pm_ops pwm_lpss_platform_pm_ops = {
+       .prepare = pwm_lpss_prepare,
+       SET_SYSTEM_SLEEP_PM_OPS(pwm_lpss_suspend, pwm_lpss_resume)
+};
 
 static const struct acpi_device_id pwm_lpss_acpi_match[] = {
        { "80860F09", (unsigned long)&pwm_lpss_byt_info },
        { "80862288", (unsigned long)&pwm_lpss_bsw_info },
+       { "80862289", (unsigned long)&pwm_lpss_bsw_info },
        { "80865AC8", (unsigned long)&pwm_lpss_bxt_info },
        { },
 };
index 4721a264bac2580cf8d21ee54396e0b494f1c9dc..2ac3a2aa9e53f5cab594fccdb042c1d77bcafa87 100644 (file)
 /* Size of each PWM register space if multiple */
 #define PWM_SIZE                       0x400
 
-#define MAX_PWMS                       4
-
-struct pwm_lpss_chip {
-       struct pwm_chip chip;
-       void __iomem *regs;
-       const struct pwm_lpss_boardinfo *info;
-       u32 saved_ctrl[MAX_PWMS];
-};
-
 static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip)
 {
        return container_of(chip, struct pwm_lpss_chip, chip);
@@ -97,7 +88,7 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
        unsigned long long on_time_div;
        unsigned long c = lpwm->info->clk_rate, base_unit_range;
        unsigned long long base_unit, freq = NSEC_PER_SEC;
-       u32 ctrl;
+       u32 orig_ctrl, ctrl;
 
        do_div(freq, period_ns);
 
@@ -114,13 +105,17 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
        do_div(on_time_div, period_ns);
        on_time_div = 255ULL - on_time_div;
 
-       ctrl = pwm_lpss_read(pwm);
+       orig_ctrl = ctrl = pwm_lpss_read(pwm);
        ctrl &= ~PWM_ON_TIME_DIV_MASK;
        ctrl &= ~(base_unit_range << PWM_BASE_UNIT_SHIFT);
        base_unit &= base_unit_range;
        ctrl |= (u32) base_unit << PWM_BASE_UNIT_SHIFT;
        ctrl |= on_time_div;
-       pwm_lpss_write(pwm, ctrl);
+
+       if (orig_ctrl != ctrl) {
+               pwm_lpss_write(pwm, ctrl);
+               pwm_lpss_write(pwm, ctrl | PWM_SW_UPDATE);
+       }
 }
 
 static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond)
@@ -144,7 +139,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                                return ret;
                        }
                        pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-                       pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
                        pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false);
                        ret = pwm_lpss_wait_for_update(pwm);
                        if (ret) {
@@ -157,7 +151,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                        if (ret)
                                return ret;
                        pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-                       pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
                        return pwm_lpss_wait_for_update(pwm);
                }
        } else if (pwm_is_enabled(pwm)) {
@@ -168,8 +161,42 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        return 0;
 }
 
+/* This function gets called once from pwmchip_add to get the initial state */
+static void pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+                              struct pwm_state *state)
+{
+       struct pwm_lpss_chip *lpwm = to_lpwm(chip);
+       unsigned long base_unit_range;
+       unsigned long long base_unit, freq, on_time_div;
+       u32 ctrl;
+
+       base_unit_range = BIT(lpwm->info->base_unit_bits);
+
+       ctrl = pwm_lpss_read(pwm);
+       on_time_div = 255 - (ctrl & PWM_ON_TIME_DIV_MASK);
+       base_unit = (ctrl >> PWM_BASE_UNIT_SHIFT) & (base_unit_range - 1);
+
+       freq = base_unit * lpwm->info->clk_rate;
+       do_div(freq, base_unit_range);
+       if (freq == 0)
+               state->period = NSEC_PER_SEC;
+       else
+               state->period = NSEC_PER_SEC / (unsigned long)freq;
+
+       on_time_div *= state->period;
+       do_div(on_time_div, 255);
+       state->duty_cycle = on_time_div;
+
+       state->polarity = PWM_POLARITY_NORMAL;
+       state->enabled = !!(ctrl & PWM_ENABLE);
+
+       if (state->enabled)
+               pm_runtime_get(chip->dev);
+}
+
 static const struct pwm_ops pwm_lpss_ops = {
        .apply = pwm_lpss_apply,
+       .get_state = pwm_lpss_get_state,
        .owner = THIS_MODULE,
 };
 
@@ -214,6 +241,12 @@ EXPORT_SYMBOL_GPL(pwm_lpss_probe);
 
 int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
 {
+       int i;
+
+       for (i = 0; i < lpwm->info->npwm; i++) {
+               if (pwm_is_enabled(&lpwm->chip.pwms[i]))
+                       pm_runtime_put(lpwm->chip.dev);
+       }
        return pwmchip_remove(&lpwm->chip);
 }
 EXPORT_SYMBOL_GPL(pwm_lpss_remove);
index 7a4238ad1fcb1f25390032019170759c6666ae83..3236be835bd9c948b929cd066ae74feaff2eed73 100644 (file)
 #include <linux/device.h>
 #include <linux/pwm.h>
 
-struct pwm_lpss_chip;
+#define MAX_PWMS                       4
+
+struct pwm_lpss_chip {
+       struct pwm_chip chip;
+       void __iomem *regs;
+       const struct pwm_lpss_boardinfo *info;
+       u32 saved_ctrl[MAX_PWMS];
+};
 
 struct pwm_lpss_boardinfo {
        unsigned long clk_rate;
        unsigned int npwm;
        unsigned long base_unit_bits;
        bool bypass;
+       /*
+        * On some devices the _PS0/_PS3 AML code of the GPU (GFX0) device
+        * messes with the PWM0 controllers state,
+        */
+       bool other_devices_aml_touches_pwm_regs;
 };
 
 struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
index 748f614d53755daabdd9f1529d7ba9cd602cb611..a41812fc6f95733a2568eb0d6b5c7f44fe5a1741 100644 (file)
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * R-Car PWM Timer driver
  *
  * Copyright (C) 2015 Renesas Electronics Corporation
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
index 29267d12fb4c9d3cf9283c38b114b00430704b09..4a855a21b782dea30c8b6e555a76132a3e306deb 100644 (file)
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * R-Mobile TPU PWM driver
  *
  * Copyright (C) 2012 Renesas Solutions Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
index f8ebbece57b71ad5f03501304b8144b1341cbe7f..48c4595a0ffcecb815bf641a7ca7ae34f77475cd 100644 (file)
@@ -300,7 +300,6 @@ static const struct of_device_id tegra_pwm_of_match[] = {
        { .compatible = "nvidia,tegra186-pwm", .data = &tegra186_pwm_soc },
        { }
 };
-
 MODULE_DEVICE_TABLE(of, tegra_pwm_of_match);
 
 static const struct dev_pm_ops tegra_pwm_pm_ops = {
index 7c71cdb8a9d8f92102b5875d120b83a3364fb6d0..ceb233dd604840bd2370dbe3c0446a751f22fe6d 100644 (file)
@@ -249,6 +249,7 @@ static void pwm_export_release(struct device *child)
 static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
 {
        struct pwm_export *export;
+       char *pwm_prop[2];
        int ret;
 
        if (test_and_set_bit(PWMF_EXPORTED, &pwm->flags))
@@ -263,7 +264,6 @@ static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
        export->pwm = pwm;
        mutex_init(&export->lock);
 
-       export->child.class = parent->class;
        export->child.release = pwm_export_release;
        export->child.parent = parent;
        export->child.devt = MKDEV(0, 0);
@@ -277,6 +277,10 @@ static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
                export = NULL;
                return ret;
        }
+       pwm_prop[0] = kasprintf(GFP_KERNEL, "EXPORT=pwm%u", pwm->hwpwm);
+       pwm_prop[1] = NULL;
+       kobject_uevent_env(&parent->kobj, KOBJ_CHANGE, pwm_prop);
+       kfree(pwm_prop[0]);
 
        return 0;
 }
@@ -289,6 +293,7 @@ static int pwm_unexport_match(struct device *child, void *data)
 static int pwm_unexport_child(struct device *parent, struct pwm_device *pwm)
 {
        struct device *child;
+       char *pwm_prop[2];
 
        if (!test_and_clear_bit(PWMF_EXPORTED, &pwm->flags))
                return -ENODEV;
@@ -297,6 +302,11 @@ static int pwm_unexport_child(struct device *parent, struct pwm_device *pwm)
        if (!child)
                return -ENODEV;
 
+       pwm_prop[0] = kasprintf(GFP_KERNEL, "UNEXPORT=pwm%u", pwm->hwpwm);
+       pwm_prop[1] = NULL;
+       kobject_uevent_env(&parent->kobj, KOBJ_CHANGE, pwm_prop);
+       kfree(pwm_prop[0]);
+
        /* for device_find_child() */
        put_device(child);
        device_unregister(child);
index 6843bc7ee9f24525789e3bd5100ee8278fd3e413..04e294d1d16d7ea68f8c9aea0d2f56bc5cb6ec11 100644 (file)
@@ -87,6 +87,18 @@ struct qeth_dbf_info {
 #define SENSE_RESETTING_EVENT_BYTE 1
 #define SENSE_RESETTING_EVENT_FLAG 0x80
 
+static inline u32 qeth_get_device_id(struct ccw_device *cdev)
+{
+       struct ccw_dev_id dev_id;
+       u32 id;
+
+       ccw_device_get_id(cdev, &dev_id);
+       id = dev_id.devno;
+       id |= (u32) (dev_id.ssid << 16);
+
+       return id;
+}
+
 /*
  * Common IO related definitions
  */
@@ -97,7 +109,8 @@ struct qeth_dbf_info {
 #define CARD_RDEV_ID(card) dev_name(&card->read.ccwdev->dev)
 #define CARD_WDEV_ID(card) dev_name(&card->write.ccwdev->dev)
 #define CARD_DDEV_ID(card) dev_name(&card->data.ccwdev->dev)
-#define CHANNEL_ID(channel) dev_name(&channel->ccwdev->dev)
+#define CCW_DEVID(cdev)                (qeth_get_device_id(cdev))
+#define CARD_DEVID(card)       (CCW_DEVID(CARD_RDEV(card)))
 
 /**
  * card stuff
@@ -830,6 +843,11 @@ struct qeth_trap_id {
 /*some helper functions*/
 #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "")
 
+static inline bool qeth_netdev_is_registered(struct net_device *dev)
+{
+       return dev->netdev_ops != NULL;
+}
+
 static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf,
                                          unsigned int elements)
 {
@@ -973,7 +991,7 @@ int qeth_wait_for_threads(struct qeth_card *, unsigned long);
 int qeth_do_run_thread(struct qeth_card *, unsigned long);
 void qeth_clear_thread_start_bit(struct qeth_card *, unsigned long);
 void qeth_clear_thread_running_bit(struct qeth_card *, unsigned long);
-int qeth_core_hardsetup_card(struct qeth_card *);
+int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok);
 void qeth_print_status_message(struct qeth_card *);
 int qeth_init_qdio_queues(struct qeth_card *);
 int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
@@ -1028,11 +1046,6 @@ int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
 void qeth_trace_features(struct qeth_card *);
 void qeth_close_dev(struct qeth_card *);
-int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16,
-                         long,
-                         int (*reply_cb)(struct qeth_card *,
-                                         struct qeth_reply *, unsigned long),
-                         void *);
 int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long);
 struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
                                                 enum qeth_ipa_funcs,
index 3274f13aad57612967cbcfd832a2349572a0b122..4bce5ae65a55c193ec84c0d64137adde497daf6b 100644 (file)
@@ -167,6 +167,8 @@ const char *qeth_get_cardname_short(struct qeth_card *card)
                                return "OSD_1000";
                        case QETH_LINK_TYPE_10GBIT_ETH:
                                return "OSD_10GIG";
+                       case QETH_LINK_TYPE_25GBIT_ETH:
+                               return "OSD_25GIG";
                        case QETH_LINK_TYPE_LANE_ETH100:
                                return "OSD_FE_LANE";
                        case QETH_LINK_TYPE_LANE_TR:
@@ -554,8 +556,8 @@ static int __qeth_issue_next_read(struct qeth_card *card)
        if (!iob) {
                dev_warn(&card->gdev->dev, "The qeth device driver "
                        "failed to recover an error on the device\n");
-               QETH_DBF_MESSAGE(2, "%s issue_next_read failed: no iob "
-                       "available\n", dev_name(&card->gdev->dev));
+               QETH_DBF_MESSAGE(2, "issue_next_read on device %x failed: no iob available\n",
+                                CARD_DEVID(card));
                return -ENOMEM;
        }
        qeth_setup_ccw(channel->ccw, CCW_CMD_READ, QETH_BUFSIZE, iob->data);
@@ -563,8 +565,8 @@ static int __qeth_issue_next_read(struct qeth_card *card)
        rc = ccw_device_start(channel->ccwdev, channel->ccw,
                              (addr_t) iob, 0, 0);
        if (rc) {
-               QETH_DBF_MESSAGE(2, "%s error in starting next read ccw! "
-                       "rc=%i\n", dev_name(&card->gdev->dev), rc);
+               QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n",
+                                rc, CARD_DEVID(card));
                atomic_set(&channel->irq_pending, 0);
                card->read_or_write_problem = 1;
                qeth_schedule_recovery(card);
@@ -613,16 +615,14 @@ static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc,
        const char *ipa_name;
        int com = cmd->hdr.command;
        ipa_name = qeth_get_ipa_cmd_name(com);
+
        if (rc)
-               QETH_DBF_MESSAGE(2, "IPA: %s(x%X) for %s/%s returned "
-                               "x%X \"%s\"\n",
-                               ipa_name, com, dev_name(&card->gdev->dev),
-                               QETH_CARD_IFNAME(card), rc,
-                               qeth_get_ipa_msg(rc));
+               QETH_DBF_MESSAGE(2, "IPA: %s(%#x) for device %x returned %#x \"%s\"\n",
+                                ipa_name, com, CARD_DEVID(card), rc,
+                                qeth_get_ipa_msg(rc));
        else
-               QETH_DBF_MESSAGE(5, "IPA: %s(x%X) for %s/%s succeeded\n",
-                               ipa_name, com, dev_name(&card->gdev->dev),
-                               QETH_CARD_IFNAME(card));
+               QETH_DBF_MESSAGE(5, "IPA: %s(%#x) for device %x succeeded\n",
+                                ipa_name, com, CARD_DEVID(card));
 }
 
 static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
@@ -711,7 +711,7 @@ static int qeth_check_idx_response(struct qeth_card *card,
 
        QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN);
        if ((buffer[2] & 0xc0) == 0xc0) {
-               QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#02x\n",
+               QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#04x\n",
                                 buffer[4]);
                QETH_CARD_TEXT(card, 2, "ckidxres");
                QETH_CARD_TEXT(card, 2, " idxterm");
@@ -972,8 +972,8 @@ static int qeth_get_problem(struct qeth_card *card, struct ccw_device *cdev,
                QETH_CARD_TEXT(card, 2, "CGENCHK");
                dev_warn(&cdev->dev, "The qeth device driver "
                        "failed to recover an error on the device\n");
-               QETH_DBF_MESSAGE(2, "%s check on device dstat=x%x, cstat=x%x\n",
-                       dev_name(&cdev->dev), dstat, cstat);
+               QETH_DBF_MESSAGE(2, "check on channel %x with dstat=%#x, cstat=%#x\n",
+                                CCW_DEVID(cdev), dstat, cstat);
                print_hex_dump(KERN_WARNING, "qeth: irb ", DUMP_PREFIX_OFFSET,
                                16, 1, irb, 64, 1);
                return 1;
@@ -1013,8 +1013,8 @@ static long qeth_check_irb_error(struct qeth_card *card,
 
        switch (PTR_ERR(irb)) {
        case -EIO:
-               QETH_DBF_MESSAGE(2, "%s i/o-error on device\n",
-                       dev_name(&cdev->dev));
+               QETH_DBF_MESSAGE(2, "i/o-error on channel %x\n",
+                                CCW_DEVID(cdev));
                QETH_CARD_TEXT(card, 2, "ckirberr");
                QETH_CARD_TEXT_(card, 2, "  rc%d", -EIO);
                break;
@@ -1031,8 +1031,8 @@ static long qeth_check_irb_error(struct qeth_card *card,
                }
                break;
        default:
-               QETH_DBF_MESSAGE(2, "%s unknown error %ld on device\n",
-                       dev_name(&cdev->dev), PTR_ERR(irb));
+               QETH_DBF_MESSAGE(2, "unknown error %ld on channel %x\n",
+                                PTR_ERR(irb), CCW_DEVID(cdev));
                QETH_CARD_TEXT(card, 2, "ckirberr");
                QETH_CARD_TEXT(card, 2, "  rc???");
        }
@@ -1114,9 +1114,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
                        dev_warn(&channel->ccwdev->dev,
                                "The qeth device driver failed to recover "
                                "an error on the device\n");
-                       QETH_DBF_MESSAGE(2, "%s sense data available. cstat "
-                               "0x%X dstat 0x%X\n",
-                               dev_name(&channel->ccwdev->dev), cstat, dstat);
+                       QETH_DBF_MESSAGE(2, "sense data available on channel %x: cstat %#X dstat %#X\n",
+                                        CCW_DEVID(channel->ccwdev), cstat,
+                                        dstat);
                        print_hex_dump(KERN_WARNING, "qeth: irb ",
                                DUMP_PREFIX_OFFSET, 16, 1, irb, 32, 1);
                        print_hex_dump(KERN_WARNING, "qeth: sense data ",
@@ -1890,8 +1890,8 @@ static int qeth_idx_activate_channel(struct qeth_card *card,
        if (channel->state != CH_STATE_ACTIVATING) {
                dev_warn(&channel->ccwdev->dev, "The qeth device driver"
                        " failed to recover an error on the device\n");
-               QETH_DBF_MESSAGE(2, "%s IDX activate timed out\n",
-                       dev_name(&channel->ccwdev->dev));
+               QETH_DBF_MESSAGE(2, "IDX activate timed out on channel %x\n",
+                                CCW_DEVID(channel->ccwdev));
                QETH_DBF_TEXT_(SETUP, 2, "2err%d", -ETIME);
                return -ETIME;
        }
@@ -1926,17 +1926,15 @@ static void qeth_idx_write_cb(struct qeth_card *card,
                                "The adapter is used exclusively by another "
                                "host\n");
                else
-                       QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on write channel:"
-                               " negative reply\n",
-                               dev_name(&channel->ccwdev->dev));
+                       QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: negative reply\n",
+                                        CCW_DEVID(channel->ccwdev));
                goto out;
        }
        memcpy(&temp, QETH_IDX_ACT_FUNC_LEVEL(iob->data), 2);
        if ((temp & ~0x0100) != qeth_peer_func_level(card->info.func_level)) {
-               QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on write channel: "
-                       "function level mismatch (sent: 0x%x, received: "
-                       "0x%x)\n", dev_name(&channel->ccwdev->dev),
-                       card->info.func_level, temp);
+               QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: function level mismatch (sent: %#x, received: %#x)\n",
+                                CCW_DEVID(channel->ccwdev),
+                                card->info.func_level, temp);
                goto out;
        }
        channel->state = CH_STATE_UP;
@@ -1973,9 +1971,8 @@ static void qeth_idx_read_cb(struct qeth_card *card,
                                "insufficient authorization\n");
                        break;
                default:
-                       QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on read channel:"
-                               " negative reply\n",
-                               dev_name(&channel->ccwdev->dev));
+                       QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: negative reply\n",
+                                        CCW_DEVID(channel->ccwdev));
                }
                QETH_CARD_TEXT_(card, 2, "idxread%c",
                        QETH_IDX_ACT_CAUSE_CODE(iob->data));
@@ -1984,10 +1981,9 @@ static void qeth_idx_read_cb(struct qeth_card *card,
 
        memcpy(&temp, QETH_IDX_ACT_FUNC_LEVEL(iob->data), 2);
        if (temp != qeth_peer_func_level(card->info.func_level)) {
-               QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on read channel: function "
-                       "level mismatch (sent: 0x%x, received: 0x%x)\n",
-                       dev_name(&channel->ccwdev->dev),
-                       card->info.func_level, temp);
+               QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: function level mismatch (sent: %#x, received: %#x)\n",
+                                CCW_DEVID(channel->ccwdev),
+                                card->info.func_level, temp);
                goto out;
        }
        memcpy(&card->token.issuer_rm_r,
@@ -2096,9 +2092,8 @@ int qeth_send_control_data(struct qeth_card *card, int len,
                                      (addr_t) iob, 0, 0, event_timeout);
        spin_unlock_irq(get_ccwdev_lock(channel->ccwdev));
        if (rc) {
-               QETH_DBF_MESSAGE(2, "%s qeth_send_control_data: "
-                       "ccw_device_start rc = %i\n",
-                       dev_name(&channel->ccwdev->dev), rc);
+               QETH_DBF_MESSAGE(2, "qeth_send_control_data on device %x: ccw_device_start rc = %i\n",
+                                CARD_DEVID(card), rc);
                QETH_CARD_TEXT_(card, 2, " err%d", rc);
                spin_lock_irq(&card->lock);
                list_del_init(&reply->list);
@@ -2853,8 +2848,8 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
        } else {
                dev_warn(&card->gdev->dev,
                         "The qeth driver ran out of channel command buffers\n");
-               QETH_DBF_MESSAGE(1, "%s The qeth driver ran out of channel command buffers",
-                                dev_name(&card->gdev->dev));
+               QETH_DBF_MESSAGE(1, "device %x ran out of channel command buffers",
+                                CARD_DEVID(card));
        }
 
        return iob;
@@ -2989,10 +2984,9 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
                return 0;
        default:
                if (cmd->hdr.return_code) {
-                       QETH_DBF_MESSAGE(1, "%s IPA_CMD_QIPASSIST: Unhandled "
-                                               "rc=%d\n",
-                                               dev_name(&card->gdev->dev),
-                                               cmd->hdr.return_code);
+                       QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n",
+                                        CARD_DEVID(card),
+                                        cmd->hdr.return_code);
                        return 0;
                }
        }
@@ -3004,8 +2998,8 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
                card->options.ipa6.supported_funcs = cmd->hdr.ipa_supported;
                card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled;
        } else
-               QETH_DBF_MESSAGE(1, "%s IPA_CMD_QIPASSIST: Flawed LIC detected"
-                                       "\n", dev_name(&card->gdev->dev));
+               QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Flawed LIC detected\n",
+                                CARD_DEVID(card));
        return 0;
 }
 
@@ -4297,10 +4291,9 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
                cmd->data.setadapterparms.hdr.return_code);
        if (cmd->data.setadapterparms.hdr.return_code !=
                                                SET_ACCESS_CTRL_RC_SUCCESS)
-               QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%s,%d)==%d\n",
-                               card->gdev->dev.kobj.name,
-                               access_ctrl_req->subcmd_code,
-                               cmd->data.setadapterparms.hdr.return_code);
+               QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%#x) on device %x: %#x\n",
+                                access_ctrl_req->subcmd_code, CARD_DEVID(card),
+                                cmd->data.setadapterparms.hdr.return_code);
        switch (cmd->data.setadapterparms.hdr.return_code) {
        case SET_ACCESS_CTRL_RC_SUCCESS:
                if (card->options.isolation == ISOLATION_MODE_NONE) {
@@ -4312,14 +4305,14 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
                }
                break;
        case SET_ACCESS_CTRL_RC_ALREADY_NOT_ISOLATED:
-               QETH_DBF_MESSAGE(2, "%s QDIO data connection isolation already "
-                               "deactivated\n", dev_name(&card->gdev->dev));
+               QETH_DBF_MESSAGE(2, "QDIO data connection isolation on device %x already deactivated\n",
+                                CARD_DEVID(card));
                if (fallback)
                        card->options.isolation = card->options.prev_isolation;
                break;
        case SET_ACCESS_CTRL_RC_ALREADY_ISOLATED:
-               QETH_DBF_MESSAGE(2, "%s QDIO data connection isolation already"
-                               " activated\n", dev_name(&card->gdev->dev));
+               QETH_DBF_MESSAGE(2, "QDIO data connection isolation on device %x already activated\n",
+                                CARD_DEVID(card));
                if (fallback)
                        card->options.isolation = card->options.prev_isolation;
                break;
@@ -4405,10 +4398,8 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback)
                rc = qeth_setadpparms_set_access_ctrl(card,
                        card->options.isolation, fallback);
                if (rc) {
-                       QETH_DBF_MESSAGE(3,
-                               "IPA(SET_ACCESS_CTRL,%s,%d) sent failed\n",
-                               card->gdev->dev.kobj.name,
-                               rc);
+                       QETH_DBF_MESSAGE(3, "IPA(SET_ACCESS_CTRL(%d) on device %x: sent failed\n",
+                                        rc, CARD_DEVID(card));
                        rc = -EOPNOTSUPP;
                }
        } else if (card->options.isolation != ISOLATION_MODE_NONE) {
@@ -4443,7 +4434,8 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum)
                rc = BMCR_FULLDPLX;
                if ((card->info.link_type != QETH_LINK_TYPE_GBIT_ETH) &&
                    (card->info.link_type != QETH_LINK_TYPE_OSN) &&
-                   (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH))
+                   (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH) &&
+                   (card->info.link_type != QETH_LINK_TYPE_25GBIT_ETH))
                        rc |= BMCR_SPEED100;
                break;
        case MII_BMSR: /* Basic mode status register */
@@ -4634,8 +4626,8 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
        rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len,
                                    qeth_snmp_command_cb, (void *)&qinfo);
        if (rc)
-               QETH_DBF_MESSAGE(2, "SNMP command failed on %s: (0x%x)\n",
-                          QETH_CARD_IFNAME(card), rc);
+               QETH_DBF_MESSAGE(2, "SNMP command failed on device %x: (%#x)\n",
+                                CARD_DEVID(card), rc);
        else {
                if (copy_to_user(udata, qinfo.udata, qinfo.udata_len))
                        rc = -EFAULT;
@@ -4869,8 +4861,8 @@ static void qeth_determine_capabilities(struct qeth_card *card)
 
        rc = qeth_read_conf_data(card, (void **) &prcd, &length);
        if (rc) {
-               QETH_DBF_MESSAGE(2, "%s qeth_read_conf_data returned %i\n",
-                       dev_name(&card->gdev->dev), rc);
+               QETH_DBF_MESSAGE(2, "qeth_read_conf_data on device %x returned %i\n",
+                                CARD_DEVID(card), rc);
                QETH_DBF_TEXT_(SETUP, 2, "5err%d", rc);
                goto out_offline;
        }
@@ -5086,7 +5078,7 @@ static struct ccw_driver qeth_ccw_driver = {
        .remove = ccwgroup_remove_ccwdev,
 };
 
-int qeth_core_hardsetup_card(struct qeth_card *card)
+int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok)
 {
        int retries = 3;
        int rc;
@@ -5096,8 +5088,8 @@ int qeth_core_hardsetup_card(struct qeth_card *card)
        qeth_update_from_chp_desc(card);
 retry:
        if (retries < 3)
-               QETH_DBF_MESSAGE(2, "%s Retrying to do IDX activates.\n",
-                       dev_name(&card->gdev->dev));
+               QETH_DBF_MESSAGE(2, "Retrying to do IDX activates on device %x.\n",
+                                CARD_DEVID(card));
        rc = qeth_qdio_clear_card(card, card->info.type != QETH_CARD_TYPE_IQD);
        ccw_device_set_offline(CARD_DDEV(card));
        ccw_device_set_offline(CARD_WDEV(card));
@@ -5161,13 +5153,20 @@ retriable:
                if (rc == IPA_RC_LAN_OFFLINE) {
                        dev_warn(&card->gdev->dev,
                                "The LAN is offline\n");
-                       netif_carrier_off(card->dev);
+                       *carrier_ok = false;
                } else {
                        rc = -ENODEV;
                        goto out;
                }
        } else {
-               netif_carrier_on(card->dev);
+               *carrier_ok = true;
+       }
+
+       if (qeth_netdev_is_registered(card->dev)) {
+               if (*carrier_ok)
+                       netif_carrier_on(card->dev);
+               else
+                       netif_carrier_off(card->dev);
        }
 
        card->options.ipa4.supported_funcs = 0;
@@ -5201,8 +5200,8 @@ retriable:
 out:
        dev_warn(&card->gdev->dev, "The qeth device driver failed to recover "
                "an error on the device\n");
-       QETH_DBF_MESSAGE(2, "%s Initialization in hardsetup failed! rc=%d\n",
-               dev_name(&card->gdev->dev), rc);
+       QETH_DBF_MESSAGE(2, "Initialization for device %x failed in hardsetup! rc=%d\n",
+                        CARD_DEVID(card), rc);
        return rc;
 }
 EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
@@ -5481,11 +5480,12 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
 }
 EXPORT_SYMBOL_GPL(qeth_get_setassparms_cmd);
 
-int qeth_send_setassparms(struct qeth_card *card,
-                         struct qeth_cmd_buffer *iob, __u16 len, long data,
-                         int (*reply_cb)(struct qeth_card *,
-                                         struct qeth_reply *, unsigned long),
-                         void *reply_param)
+static int qeth_send_setassparms(struct qeth_card *card,
+                                struct qeth_cmd_buffer *iob, u16 len,
+                                long data, int (*reply_cb)(struct qeth_card *,
+                                                           struct qeth_reply *,
+                                                           unsigned long),
+                                void *reply_param)
 {
        int rc;
        struct qeth_ipa_cmd *cmd;
@@ -5501,7 +5501,6 @@ int qeth_send_setassparms(struct qeth_card *card,
        rc = qeth_send_ipa_cmd(card, iob, reply_cb, reply_param);
        return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_send_setassparms);
 
 int qeth_send_simple_setassparms_prot(struct qeth_card *card,
                                      enum qeth_ipa_funcs ipa_func,
@@ -6170,8 +6169,14 @@ static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd,
                WARN_ON_ONCE(1);
        }
 
-       /* fallthrough from high to low, to select all legal speeds: */
+       /* partially does fall through, to also select lower speeds */
        switch (maxspeed) {
+       case SPEED_25000:
+               ethtool_link_ksettings_add_link_mode(cmd, supported,
+                                                    25000baseSR_Full);
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    25000baseSR_Full);
+               break;
        case SPEED_10000:
                ethtool_link_ksettings_add_link_mode(cmd, supported,
                                                     10000baseT_Full);
@@ -6254,6 +6259,10 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
                cmd->base.speed = SPEED_10000;
                cmd->base.port = PORT_FIBRE;
                break;
+       case QETH_LINK_TYPE_25GBIT_ETH:
+               cmd->base.speed = SPEED_25000;
+               cmd->base.port = PORT_FIBRE;
+               break;
        default:
                cmd->base.speed = SPEED_10;
                cmd->base.port = PORT_TP;
@@ -6320,6 +6329,9 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
        case CARD_INFO_PORTS_10G:
                cmd->base.speed = SPEED_10000;
                break;
+       case CARD_INFO_PORTS_25G:
+               cmd->base.speed = SPEED_25000;
+               break;
        }
 
        return 0;
index e85090467afe0a9e05b6d9b00355713ce53b2ef6..3e54be201b279f07b09481fd9c1c746802496c1e 100644 (file)
@@ -90,6 +90,7 @@ enum qeth_link_types {
        QETH_LINK_TYPE_GBIT_ETH     = 0x03,
        QETH_LINK_TYPE_OSN          = 0x04,
        QETH_LINK_TYPE_10GBIT_ETH   = 0x10,
+       QETH_LINK_TYPE_25GBIT_ETH   = 0x12,
        QETH_LINK_TYPE_LANE_ETH100  = 0x81,
        QETH_LINK_TYPE_LANE_TR      = 0x82,
        QETH_LINK_TYPE_LANE_ETH1000 = 0x83,
@@ -347,6 +348,7 @@ enum qeth_card_info_port_speed {
        CARD_INFO_PORTS_100M            = 0x00000006,
        CARD_INFO_PORTS_1G              = 0x00000007,
        CARD_INFO_PORTS_10G             = 0x00000008,
+       CARD_INFO_PORTS_25G             = 0x0000000A,
 };
 
 /* (SET)DELIP(M) IPA stuff ***************************************************/
@@ -436,7 +438,7 @@ struct qeth_ipacmd_setassparms {
                __u32 flags_32bit;
                struct qeth_ipa_caps caps;
                struct qeth_checksum_cmd chksum;
-               struct qeth_arp_cache_entry add_arp_entry;
+               struct qeth_arp_cache_entry arp_entry;
                struct qeth_arp_query_data query_arp;
                struct qeth_tso_start_data tso;
                __u8 ip[16];
index 23aaf373f631e2283e7c84ddc43f913d876f7058..2914a1a69f8300a36c1bf0580094532cb9ccecd4 100644 (file)
@@ -146,11 +146,11 @@ static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac)
        QETH_CARD_TEXT(card, 2, "L2Wmac");
        rc = qeth_l2_send_setdelmac(card, mac, cmd);
        if (rc == -EEXIST)
-               QETH_DBF_MESSAGE(2, "MAC %pM already registered on %s\n",
-                                mac, QETH_CARD_IFNAME(card));
+               QETH_DBF_MESSAGE(2, "MAC already registered on device %x\n",
+                                CARD_DEVID(card));
        else if (rc)
-               QETH_DBF_MESSAGE(2, "Failed to register MAC %pM on %s: %d\n",
-                                mac, QETH_CARD_IFNAME(card), rc);
+               QETH_DBF_MESSAGE(2, "Failed to register MAC on device %x: %d\n",
+                                CARD_DEVID(card), rc);
        return rc;
 }
 
@@ -163,8 +163,8 @@ static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac)
        QETH_CARD_TEXT(card, 2, "L2Rmac");
        rc = qeth_l2_send_setdelmac(card, mac, cmd);
        if (rc)
-               QETH_DBF_MESSAGE(2, "Failed to delete MAC %pM on %s: %d\n",
-                                mac, QETH_CARD_IFNAME(card), rc);
+               QETH_DBF_MESSAGE(2, "Failed to delete MAC on device %u: %d\n",
+                                CARD_DEVID(card), rc);
        return rc;
 }
 
@@ -260,9 +260,9 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card,
 
        QETH_CARD_TEXT(card, 2, "L2sdvcb");
        if (cmd->hdr.return_code) {
-               QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x.\n",
+               QETH_DBF_MESSAGE(2, "Error in processing VLAN %u on device %x: %#x.\n",
                                 cmd->data.setdelvlan.vlan_id,
-                                QETH_CARD_IFNAME(card), cmd->hdr.return_code);
+                                CARD_DEVID(card), cmd->hdr.return_code);
                QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command);
                QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code);
        }
@@ -455,8 +455,8 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
                rc = qeth_vm_request_mac(card);
                if (!rc)
                        goto out;
-               QETH_DBF_MESSAGE(2, "z/VM MAC Service failed on device %s: x%x\n",
-                                CARD_BUS_ID(card), rc);
+               QETH_DBF_MESSAGE(2, "z/VM MAC Service failed on device %x: %#x\n",
+                                CARD_DEVID(card), rc);
                QETH_DBF_TEXT_(SETUP, 2, "err%04x", rc);
                /* fall back to alternative mechanism: */
        }
@@ -468,8 +468,8 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
                rc = qeth_setadpparms_change_macaddr(card);
                if (!rc)
                        goto out;
-               QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %s: x%x\n",
-                                CARD_BUS_ID(card), rc);
+               QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n",
+                                CARD_DEVID(card), rc);
                QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc);
                /* fall back once more: */
        }
@@ -826,7 +826,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 
        if (cgdev->state == CCWGROUP_ONLINE)
                qeth_l2_set_offline(cgdev);
-       unregister_netdev(card->dev);
+       if (qeth_netdev_is_registered(card->dev))
+               unregister_netdev(card->dev);
 }
 
 static const struct ethtool_ops qeth_l2_ethtool_ops = {
@@ -862,11 +863,11 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
        .ndo_set_features       = qeth_set_features
 };
 
-static int qeth_l2_setup_netdev(struct qeth_card *card)
+static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok)
 {
        int rc;
 
-       if (card->dev->netdev_ops)
+       if (qeth_netdev_is_registered(card->dev))
                return 0;
 
        card->dev->priv_flags |= IFF_UNICAST_FLT;
@@ -919,6 +920,9 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
        qeth_l2_request_initial_mac(card);
        netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
        rc = register_netdev(card->dev);
+       if (!rc && carrier_ok)
+               netif_carrier_on(card->dev);
+
        if (rc)
                card->dev->netdev_ops = NULL;
        return rc;
@@ -949,6 +953,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
        struct qeth_card *card = dev_get_drvdata(&gdev->dev);
        int rc = 0;
        enum qeth_card_states recover_flag;
+       bool carrier_ok;
 
        mutex_lock(&card->discipline_mutex);
        mutex_lock(&card->conf_mutex);
@@ -956,7 +961,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
        QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 
        recover_flag = card->state;
-       rc = qeth_core_hardsetup_card(card);
+       rc = qeth_core_hardsetup_card(card, &carrier_ok);
        if (rc) {
                QETH_DBF_TEXT_(SETUP, 2, "2err%04x", rc);
                rc = -ENODEV;
@@ -967,7 +972,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
                dev_info(&card->gdev->dev,
                "The device represents a Bridge Capable Port\n");
 
-       rc = qeth_l2_setup_netdev(card);
+       rc = qeth_l2_setup_netdev(card, carrier_ok);
        if (rc)
                goto out_remove;
 
index 0b161cc1fd2e62f2251be71e939813ed58b5529c..f08b745c20073b92bd2a78da983ff2a9ade3ca77 100644 (file)
@@ -278,9 +278,6 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
 
        QETH_CARD_TEXT(card, 4, "clearip");
 
-       if (recover && card->options.sniffer)
-               return;
-
        spin_lock_bh(&card->ip_lock);
 
        hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
@@ -494,9 +491,8 @@ int qeth_l3_setrouting_v4(struct qeth_card *card)
                                  QETH_PROT_IPV4);
        if (rc) {
                card->options.route4.type = NO_ROUTER;
-               QETH_DBF_MESSAGE(2, "Error (0x%04x) while setting routing type"
-                       " on %s. Type set to 'no router'.\n", rc,
-                       QETH_CARD_IFNAME(card));
+               QETH_DBF_MESSAGE(2, "Error (%#06x) while setting routing type on device %x. Type set to 'no router'.\n",
+                                rc, CARD_DEVID(card));
        }
        return rc;
 }
@@ -518,9 +514,8 @@ int qeth_l3_setrouting_v6(struct qeth_card *card)
                                  QETH_PROT_IPV6);
        if (rc) {
                card->options.route6.type = NO_ROUTER;
-               QETH_DBF_MESSAGE(2, "Error (0x%04x) while setting routing type"
-                       " on %s. Type set to 'no router'.\n", rc,
-                       QETH_CARD_IFNAME(card));
+               QETH_DBF_MESSAGE(2, "Error (%#06x) while setting routing type on device %x. Type set to 'no router'.\n",
+                                rc, CARD_DEVID(card));
        }
        return rc;
 }
@@ -663,6 +658,8 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card,
        int rc = 0;
        int cnt = 3;
 
+       if (card->options.sniffer)
+               return 0;
 
        if (addr->proto == QETH_PROT_IPV4) {
                QETH_CARD_TEXT(card, 2, "setaddr4");
@@ -697,6 +694,9 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card,
 {
        int rc = 0;
 
+       if (card->options.sniffer)
+               return 0;
+
        if (addr->proto == QETH_PROT_IPV4) {
                QETH_CARD_TEXT(card, 2, "deladdr4");
                QETH_CARD_HEX(card, 3, &addr->u.a4.addr, sizeof(int));
@@ -1070,8 +1070,8 @@ qeth_diags_trace_cb(struct qeth_card *card, struct qeth_reply *reply,
                }
                break;
        default:
-               QETH_DBF_MESSAGE(2, "Unknown sniffer action (0x%04x) on %s\n",
-                       cmd->data.diagass.action, QETH_CARD_IFNAME(card));
+               QETH_DBF_MESSAGE(2, "Unknown sniffer action (%#06x) on device %x\n",
+                                cmd->data.diagass.action, CARD_DEVID(card));
        }
 
        return 0;
@@ -1517,32 +1517,25 @@ static void qeth_l3_set_rx_mode(struct net_device *dev)
        qeth_l3_handle_promisc_mode(card);
 }
 
-static const char *qeth_l3_arp_get_error_cause(int *rc)
+static int qeth_l3_arp_makerc(int rc)
 {
-       switch (*rc) {
-       case QETH_IPA_ARP_RC_FAILED:
-               *rc = -EIO;
-               return "operation failed";
+       switch (rc) {
+       case IPA_RC_SUCCESS:
+               return 0;
        case QETH_IPA_ARP_RC_NOTSUPP:
-               *rc = -EOPNOTSUPP;
-               return "operation not supported";
-       case QETH_IPA_ARP_RC_OUT_OF_RANGE:
-               *rc = -EINVAL;
-               return "argument out of range";
        case QETH_IPA_ARP_RC_Q_NOTSUPP:
-               *rc = -EOPNOTSUPP;
-               return "query operation not supported";
+               return -EOPNOTSUPP;
+       case QETH_IPA_ARP_RC_OUT_OF_RANGE:
+               return -EINVAL;
        case QETH_IPA_ARP_RC_Q_NO_DATA:
-               *rc = -ENOENT;
-               return "no query data available";
+               return -ENOENT;
        default:
-               return "unknown error";
+               return -EIO;
        }
 }
 
 static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries)
 {
-       int tmp;
        int rc;
 
        QETH_CARD_TEXT(card, 3, "arpstnoe");
@@ -1560,13 +1553,10 @@ static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries)
        rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING,
                                          IPA_CMD_ASS_ARP_SET_NO_ENTRIES,
                                          no_entries);
-       if (rc) {
-               tmp = rc;
-               QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on "
-                       "%s: %s (0x%x/%d)\n", QETH_CARD_IFNAME(card),
-                       qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-       }
-       return rc;
+       if (rc)
+               QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on device %x: %#x\n",
+                                CARD_DEVID(card), rc);
+       return qeth_l3_arp_makerc(rc);
 }
 
 static __u32 get_arp_entry_size(struct qeth_card *card,
@@ -1716,7 +1706,6 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
 {
        struct qeth_cmd_buffer *iob;
        struct qeth_ipa_cmd *cmd;
-       int tmp;
        int rc;
 
        QETH_CARD_TEXT_(card, 3, "qarpipv%i", prot);
@@ -1735,15 +1724,10 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
        rc = qeth_l3_send_ipa_arp_cmd(card, iob,
                           QETH_SETASS_BASE_LEN+QETH_ARP_CMD_LEN,
                           qeth_l3_arp_query_cb, (void *)qinfo);
-       if (rc) {
-               tmp = rc;
-               QETH_DBF_MESSAGE(2,
-                       "Error while querying ARP cache on %s: %s "
-                       "(0x%x/%d)\n", QETH_CARD_IFNAME(card),
-                       qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-       }
-
-       return rc;
+       if (rc)
+               QETH_DBF_MESSAGE(2, "Error while querying ARP cache on device %x: %#x\n",
+                                CARD_DEVID(card), rc);
+       return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata)
@@ -1793,15 +1777,18 @@ out:
        return rc;
 }
 
-static int qeth_l3_arp_add_entry(struct qeth_card *card,
-                               struct qeth_arp_cache_entry *entry)
+static int qeth_l3_arp_modify_entry(struct qeth_card *card,
+                                   struct qeth_arp_cache_entry *entry,
+                                   enum qeth_arp_process_subcmds arp_cmd)
 {
+       struct qeth_arp_cache_entry *cmd_entry;
        struct qeth_cmd_buffer *iob;
-       char buf[16];
-       int tmp;
        int rc;
 
-       QETH_CARD_TEXT(card, 3, "arpadent");
+       if (arp_cmd == IPA_CMD_ASS_ARP_ADD_ENTRY)
+               QETH_CARD_TEXT(card, 3, "arpadd");
+       else
+               QETH_CARD_TEXT(card, 3, "arpdel");
 
        /*
         * currently GuestLAN only supports the ARP assist function
@@ -1814,71 +1801,25 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card,
                return -EOPNOTSUPP;
        }
 
-       iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
-                                      IPA_CMD_ASS_ARP_ADD_ENTRY,
-                                      sizeof(struct qeth_arp_cache_entry),
-                                      QETH_PROT_IPV4);
+       iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, arp_cmd,
+                                      sizeof(*cmd_entry), QETH_PROT_IPV4);
        if (!iob)
                return -ENOMEM;
-       rc = qeth_send_setassparms(card, iob,
-                                  sizeof(struct qeth_arp_cache_entry),
-                                  (unsigned long) entry,
-                                  qeth_setassparms_cb, NULL);
-       if (rc) {
-               tmp = rc;
-               qeth_l3_ipaddr4_to_string((u8 *)entry->ipaddr, buf);
-               QETH_DBF_MESSAGE(2, "Could not add ARP entry for address %s "
-                       "on %s: %s (0x%x/%d)\n", buf, QETH_CARD_IFNAME(card),
-                       qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-       }
-       return rc;
-}
-
-static int qeth_l3_arp_remove_entry(struct qeth_card *card,
-                               struct qeth_arp_cache_entry *entry)
-{
-       struct qeth_cmd_buffer *iob;
-       char buf[16] = {0, };
-       int tmp;
-       int rc;
 
-       QETH_CARD_TEXT(card, 3, "arprment");
+       cmd_entry = &__ipa_cmd(iob)->data.setassparms.data.arp_entry;
+       ether_addr_copy(cmd_entry->macaddr, entry->macaddr);
+       memcpy(cmd_entry->ipaddr, entry->ipaddr, 4);
+       rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_cb, NULL);
+       if (rc)
+               QETH_DBF_MESSAGE(2, "Could not modify (cmd: %#x) ARP entry on device %x: %#x\n",
+                                arp_cmd, CARD_DEVID(card), rc);
 
-       /*
-        * currently GuestLAN only supports the ARP assist function
-        * IPA_CMD_ASS_ARP_QUERY_INFO, but not IPA_CMD_ASS_ARP_REMOVE_ENTRY;
-        * thus we say EOPNOTSUPP for this ARP function
-        */
-       if (card->info.guestlan)
-               return -EOPNOTSUPP;
-       if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) {
-               return -EOPNOTSUPP;
-       }
-       memcpy(buf, entry, 12);
-       iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
-                                      IPA_CMD_ASS_ARP_REMOVE_ENTRY,
-                                      12,
-                                      QETH_PROT_IPV4);
-       if (!iob)
-               return -ENOMEM;
-       rc = qeth_send_setassparms(card, iob,
-                                  12, (unsigned long)buf,
-                                  qeth_setassparms_cb, NULL);
-       if (rc) {
-               tmp = rc;
-               memset(buf, 0, 16);
-               qeth_l3_ipaddr4_to_string((u8 *)entry->ipaddr, buf);
-               QETH_DBF_MESSAGE(2, "Could not delete ARP entry for address %s"
-                       " on %s: %s (0x%x/%d)\n", buf, QETH_CARD_IFNAME(card),
-                       qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-       }
-       return rc;
+       return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_arp_flush_cache(struct qeth_card *card)
 {
        int rc;
-       int tmp;
 
        QETH_CARD_TEXT(card, 3, "arpflush");
 
@@ -1894,19 +1835,17 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card)
        }
        rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING,
                                          IPA_CMD_ASS_ARP_FLUSH_CACHE, 0);
-       if (rc) {
-               tmp = rc;
-               QETH_DBF_MESSAGE(2, "Could not flush ARP cache on %s: %s "
-                       "(0x%x/%d)\n", QETH_CARD_IFNAME(card),
-                       qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-       }
-       return rc;
+       if (rc)
+               QETH_DBF_MESSAGE(2, "Could not flush ARP cache on device %x: %#x\n",
+                                CARD_DEVID(card), rc);
+       return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
        struct qeth_card *card = dev->ml_priv;
        struct qeth_arp_cache_entry arp_entry;
+       enum qeth_arp_process_subcmds arp_cmd;
        int rc = 0;
 
        switch (cmd) {
@@ -1925,27 +1864,16 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                rc = qeth_l3_arp_query(card, rq->ifr_ifru.ifru_data);
                break;
        case SIOC_QETH_ARP_ADD_ENTRY:
-               if (!capable(CAP_NET_ADMIN)) {
-                       rc = -EPERM;
-                       break;
-               }
-               if (copy_from_user(&arp_entry, rq->ifr_ifru.ifru_data,
-                                  sizeof(struct qeth_arp_cache_entry)))
-                       rc = -EFAULT;
-               else
-                       rc = qeth_l3_arp_add_entry(card, &arp_entry);
-               break;
        case SIOC_QETH_ARP_REMOVE_ENTRY:
-               if (!capable(CAP_NET_ADMIN)) {
-                       rc = -EPERM;
-                       break;
-               }
-               if (copy_from_user(&arp_entry, rq->ifr_ifru.ifru_data,
-                                  sizeof(struct qeth_arp_cache_entry)))
-                       rc = -EFAULT;
-               else
-                       rc = qeth_l3_arp_remove_entry(card, &arp_entry);
-               break;
+               if (!capable(CAP_NET_ADMIN))
+                       return -EPERM;
+               if (copy_from_user(&arp_entry, rq->ifr_data, sizeof(arp_entry)))
+                       return -EFAULT;
+
+               arp_cmd = (cmd == SIOC_QETH_ARP_ADD_ENTRY) ?
+                               IPA_CMD_ASS_ARP_ADD_ENTRY :
+                               IPA_CMD_ASS_ARP_REMOVE_ENTRY;
+               return qeth_l3_arp_modify_entry(card, &arp_entry, arp_cmd);
        case SIOC_QETH_ARP_FLUSH_CACHE:
                if (!capable(CAP_NET_ADMIN)) {
                        rc = -EPERM;
@@ -2383,12 +2311,12 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
        .ndo_neigh_setup        = qeth_l3_neigh_setup,
 };
 
-static int qeth_l3_setup_netdev(struct qeth_card *card)
+static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok)
 {
        unsigned int headroom;
        int rc;
 
-       if (card->dev->netdev_ops)
+       if (qeth_netdev_is_registered(card->dev))
                return 0;
 
        if (card->info.type == QETH_CARD_TYPE_OSD ||
@@ -2457,6 +2385,9 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 
        netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
        rc = register_netdev(card->dev);
+       if (!rc && carrier_ok)
+               netif_carrier_on(card->dev);
+
 out:
        if (rc)
                card->dev->netdev_ops = NULL;
@@ -2497,7 +2428,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
        if (cgdev->state == CCWGROUP_ONLINE)
                qeth_l3_set_offline(cgdev);
 
-       unregister_netdev(card->dev);
+       if (qeth_netdev_is_registered(card->dev))
+               unregister_netdev(card->dev);
        qeth_l3_clear_ip_htable(card, 0);
        qeth_l3_clear_ipato_list(card);
 }
@@ -2507,6 +2439,7 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
        struct qeth_card *card = dev_get_drvdata(&gdev->dev);
        int rc = 0;
        enum qeth_card_states recover_flag;
+       bool carrier_ok;
 
        mutex_lock(&card->discipline_mutex);
        mutex_lock(&card->conf_mutex);
@@ -2514,14 +2447,14 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
        QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 
        recover_flag = card->state;
-       rc = qeth_core_hardsetup_card(card);
+       rc = qeth_core_hardsetup_card(card, &carrier_ok);
        if (rc) {
                QETH_DBF_TEXT_(SETUP, 2, "2err%04x", rc);
                rc = -ENODEV;
                goto out_remove;
        }
 
-       rc = qeth_l3_setup_netdev(card);
+       rc = qeth_l3_setup_netdev(card, carrier_ok);
        if (rc)
                goto out_remove;
 
index 05293babb03106ebc2e7bb231c57967b77050823..2d655a97b959e93523f3fdd9ad12f1ef1aea60cd 100644 (file)
@@ -143,7 +143,9 @@ static int twa_poll_status_gone(TW_Device_Extension *tw_dev, u32 flag, int secon
 static int twa_post_command_packet(TW_Device_Extension *tw_dev, int request_id, char internal);
 static int twa_reset_device_extension(TW_Device_Extension *tw_dev);
 static int twa_reset_sequence(TW_Device_Extension *tw_dev, int soft_reset);
-static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg);
+static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
+                                  unsigned char *cdb, int use_sg,
+                                  TW_SG_Entry *sglistarg);
 static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id);
 static char *twa_string_lookup(twa_message_type *table, unsigned int aen_code);
 
@@ -278,7 +280,7 @@ out:
 static int twa_aen_drain_queue(TW_Device_Extension *tw_dev, int no_check_reset)
 {
        int request_id = 0;
-       char cdb[TW_MAX_CDB_LEN];
+       unsigned char cdb[TW_MAX_CDB_LEN];
        TW_SG_Entry sglist[1];
        int finished = 0, count = 0;
        TW_Command_Full *full_command_packet;
@@ -423,7 +425,7 @@ static void twa_aen_queue_event(TW_Device_Extension *tw_dev, TW_Command_Apache_H
 /* This function will read the aen queue from the isr */
 static int twa_aen_read_queue(TW_Device_Extension *tw_dev, int request_id)
 {
-       char cdb[TW_MAX_CDB_LEN];
+       unsigned char cdb[TW_MAX_CDB_LEN];
        TW_SG_Entry sglist[1];
        TW_Command_Full *full_command_packet;
        int retval = 1;
@@ -1798,7 +1800,9 @@ out:
 static DEF_SCSI_QCMD(twa_scsi_queue)
 
 /* This function hands scsi cdb's to the firmware */
-static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg)
+static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
+                                  unsigned char *cdb, int use_sg,
+                                  TW_SG_Entry *sglistarg)
 {
        TW_Command_Full *full_command_packet;
        TW_Command_Apache *command_packet;
index 266bdac7530427ea3914705446b58facf0a01014..480cf82700e9f48a225baeda0007f2e6d5f84ece 100644 (file)
@@ -287,7 +287,9 @@ static int twl_post_command_packet(TW_Device_Extension *tw_dev, int request_id)
 } /* End twl_post_command_packet() */
 
 /* This function hands scsi cdb's to the firmware */
-static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry_ISO *sglistarg)
+static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
+                                  unsigned char *cdb, int use_sg,
+                                  TW_SG_Entry_ISO *sglistarg)
 {
        TW_Command_Full *full_command_packet;
        TW_Command_Apache *command_packet;
@@ -372,7 +374,7 @@ out:
 /* This function will read the aen queue from the isr */
 static int twl_aen_read_queue(TW_Device_Extension *tw_dev, int request_id)
 {
-       char cdb[TW_MAX_CDB_LEN];
+       unsigned char cdb[TW_MAX_CDB_LEN];
        TW_SG_Entry_ISO sglist[1];
        TW_Command_Full *full_command_packet;
        int retval = 1;
@@ -554,7 +556,7 @@ out:
 static int twl_aen_drain_queue(TW_Device_Extension *tw_dev, int no_check_reset)
 {
        int request_id = 0;
-       char cdb[TW_MAX_CDB_LEN];
+       unsigned char cdb[TW_MAX_CDB_LEN];
        TW_SG_Entry_ISO sglist[1];
        int finished = 0, count = 0;
        TW_Command_Full *full_command_packet;
index 70988c3812684cc8d1d573e3d75bdc30ef5d1a3f..f07444d30b216dace2eb7b7adcc59e818c95192e 100644 (file)
@@ -538,7 +538,7 @@ config SCSI_HPTIOP
 
 config SCSI_BUSLOGIC
        tristate "BusLogic SCSI support"
-       depends on (PCI || ISA || MCA) && SCSI && ISA_DMA_API && VIRT_TO_BUS
+       depends on (PCI || ISA) && SCSI && ISA_DMA_API && VIRT_TO_BUS
        ---help---
          This is support for BusLogic MultiMaster and FlashPoint SCSI Host
          Adapters. Consult the SCSI-HOWTO, available from
@@ -1175,12 +1175,12 @@ config SCSI_LPFC_DEBUG_FS
 
 config SCSI_SIM710
        tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
-       depends on (EISA || MCA) && SCSI
+       depends on EISA && SCSI
        select SCSI_SPI_ATTRS
        ---help---
          This driver is for NCR53c710 based SCSI host adapters.
 
-         It currently supports Compaq EISA cards and NCR MCA cards
+         It currently supports Compaq EISA cards.
 
 config SCSI_DC395x
        tristate "Tekram DC395(U/UW/F) and DC315(U) SCSI support"
index 4d7b0e0adbf70c735f0535f911dee1ef9cc0fe73..301b3cad15f88f6d4c9022d3e763a625a35172ff 100644 (file)
@@ -269,7 +269,7 @@ static LIST_HEAD(aha152x_host_list);
 /* DEFINES */
 
 /* For PCMCIA cards, always use AUTOCONF */
-#if defined(PCMCIA) || defined(MODULE)
+#if defined(AHA152X_PCMCIA) || defined(MODULE)
 #if !defined(AUTOCONF)
 #define AUTOCONF
 #endif
@@ -297,7 +297,7 @@ CMD_INC_RESID(struct scsi_cmnd *cmd, int inc)
 
 #define DELAY_DEFAULT 1000
 
-#if defined(PCMCIA)
+#if defined(AHA152X_PCMCIA)
 #define IRQ_MIN 0
 #define IRQ_MAX 16
 #else
@@ -328,7 +328,7 @@ MODULE_AUTHOR("Jürgen Fischer");
 MODULE_DESCRIPTION(AHA152X_REVID);
 MODULE_LICENSE("GPL");
 
-#if !defined(PCMCIA)
+#if !defined(AHA152X_PCMCIA)
 #if defined(MODULE)
 static int io[] = {0, 0};
 module_param_hw_array(io, int, ioport, NULL, 0);
@@ -391,7 +391,7 @@ static struct isapnp_device_id id_table[] = {
 MODULE_DEVICE_TABLE(isapnp, id_table);
 #endif /* ISAPNP */
 
-#endif /* !PCMCIA */
+#endif /* !AHA152X_PCMCIA */
 
 static struct scsi_host_template aha152x_driver_template;
 
@@ -863,7 +863,7 @@ void aha152x_release(struct Scsi_Host *shpnt)
        if (shpnt->irq)
                free_irq(shpnt->irq, shpnt);
 
-#if !defined(PCMCIA)
+#if !defined(AHA152X_PCMCIA)
        if (shpnt->io_port)
                release_region(shpnt->io_port, IO_RANGE);
 #endif
@@ -2924,7 +2924,7 @@ static struct scsi_host_template aha152x_driver_template = {
        .slave_alloc                    = aha152x_adjust_queue,
 };
 
-#if !defined(PCMCIA)
+#if !defined(AHA152X_PCMCIA)
 static int setup_count;
 static struct aha152x_setup setup[2];
 
@@ -3392,4 +3392,4 @@ static int __init aha152x_setup(char *str)
 __setup("aha152x=", aha152x_setup);
 #endif
 
-#endif /* !PCMCIA */
+#endif /* !AHA152X_PCMCIA */
index 3df1428df31727c1bad7b24904fa2b9cca670f82..311d23c727cef06c2a85cbd5e6b4f29967ef25ef 100644 (file)
@@ -790,12 +790,11 @@ static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf
        slot->n_elem = n_elem;
        slot->slot_tag = tag;
 
-       slot->buf = dma_pool_alloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma);
+       slot->buf = dma_pool_zalloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma);
        if (!slot->buf) {
                rc = -ENOMEM;
                goto err_out_tag;
        }
-       memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
 
        tei.task = task;
        tei.hdr = &mvi->slot[tag];
@@ -1906,8 +1905,7 @@ static void mvs_work_queue(struct work_struct *work)
 
                if (phy->phy_event & PHY_PLUG_OUT) {
                        u32 tmp;
-                       struct sas_identify_frame *id;
-                       id = (struct sas_identify_frame *)phy->frame_rcvd;
+
                        tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, phy_no);
                        phy->phy_event &= ~PHY_PLUG_OUT;
                        if (!(tmp & PHY_READY_MASK)) {
index dba3716511c56595967c617b139ef46abd28f23e..24b89228b2414c1ab041d916d386ae347deb572c 100644 (file)
@@ -1,3 +1,3 @@
-#define PCMCIA 1
+#define AHA152X_PCMCIA 1
 #define AHA152X_STAT 1
 #include "aha152x.c"
index b28f159fdaee79fe194be3cddc81e1842059e104..0bb9ac6ece9205b8f652fa55a7f752b780012ff5 100644 (file)
@@ -218,7 +218,7 @@ qla2x00_sysfs_write_nvram(struct file *filp, struct kobject *kobj,
 
        mutex_lock(&ha->optrom_mutex);
        if (qla2x00_chip_is_down(vha)) {
-               mutex_unlock(&vha->hw->optrom_mutex);
+               mutex_unlock(&ha->optrom_mutex);
                return -EAGAIN;
        }
 
index c72d8012fe2aabfa7d1bea57532b2d170babe169..6fe20c27acc16e54aaf02112735d2b657c61852d 100644 (file)
@@ -425,7 +425,7 @@ void qla24xx_handle_adisc_event(scsi_qla_host_t *vha, struct event_arg *ea)
        __qla24xx_handle_gpdb_event(vha, ea);
 }
 
-int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+static int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 {
        struct qla_work_evt *e;
 
@@ -680,7 +680,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
                                            fcport);
                                        break;
                                }
-                               /* drop through */
+                               /* fall through */
                        default:
                                if (fcport_is_smaller(fcport)) {
                                        /* local adapter is bigger */
@@ -1551,7 +1551,8 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
 }
 
 
-void qla_handle_els_plogi_done(scsi_qla_host_t *vha, struct event_arg *ea)
+static void qla_handle_els_plogi_done(scsi_qla_host_t *vha,
+                                     struct event_arg *ea)
 {
        ql_dbg(ql_dbg_disc, vha, 0x2118,
            "%s %d %8phC post PRLI\n",
index 86fb8b21aa71085c44f403e1ee17832f01714618..032635321ad6e75a17e3c216692bd120456edae0 100644 (file)
@@ -1195,8 +1195,8 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
  * @sp: SRB command to process
  * @cmd_pkt: Command type 3 IOCB
  * @tot_dsds: Total number of segments to transfer
- * @tot_prot_dsds:
- * @fw_prot_opts:
+ * @tot_prot_dsds: Total number of segments with protection information
+ * @fw_prot_opts: Protection options to be passed to firmware
  */
 inline int
 qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
index d73b04e405902b02d0ea5822b44c105760166e72..30d3090842f856d27fb32f23e051832d8eb7b500 100644 (file)
@@ -25,7 +25,7 @@ static int qla2x00_error_entry(scsi_qla_host_t *, struct rsp_que *,
 
 /**
  * qla2100_intr_handler() - Process interrupts for the ISP2100 and ISP2200.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
@@ -144,7 +144,7 @@ qla2x00_check_reg16_for_disconnect(scsi_qla_host_t *vha, uint16_t reg)
 
 /**
  * qla2300_intr_handler() - Process interrupts for the ISP23xx and ISP63xx.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
@@ -3109,7 +3109,7 @@ done:
 
 /**
  * qla24xx_intr_handler() - Process interrupts for the ISP23xx and ISP24xx.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
index 2f3e5075ae76e8d4fccacdf3a8aee7f14de42a24..191b6b7c8747df06419b998a8fbf06541658080a 100644 (file)
@@ -3478,9 +3478,9 @@ qla8044_read_serdes_word(scsi_qla_host_t *vha, uint32_t addr, uint32_t *data)
 /**
  * qla2x00_set_serdes_params() -
  * @vha: HA context
- * @sw_em_1g:
- * @sw_em_2g:
- * @sw_em_4g:
+ * @sw_em_1g: serial link options
+ * @sw_em_2g: serial link options
+ * @sw_em_4g: serial link options
  *
  * Returns
  */
index 521a513705549a9b263566c5bd9ea7b568bf4029..60f964c53c01a76023b627666548c3b9e0203aca 100644 (file)
@@ -2212,7 +2212,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req,
        struct bsg_job *bsg_job;
        struct fc_bsg_reply *bsg_reply;
        struct srb_iocb *iocb_job;
-       int res;
+       int res = 0;
        struct qla_mt_iocb_rsp_fx00 fstatus;
        uint8_t *fw_sts_ptr;
 
@@ -2624,7 +2624,7 @@ qlafx00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
  * qlafx00_multistatus_entry() - Process Multi response queue entries.
  * @vha: SCSI driver HA context
  * @rsp: response queue
- * @pkt:
+ * @pkt: received packet
  */
 static void
 qlafx00_multistatus_entry(struct scsi_qla_host *vha,
@@ -2681,12 +2681,10 @@ qlafx00_multistatus_entry(struct scsi_qla_host *vha,
  * @vha: SCSI driver HA context
  * @rsp: response queue
  * @pkt: Entry pointer
- * @estatus:
- * @etype:
  */
 static void
 qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
-                   struct sts_entry_fx00 *pkt, uint8_t estatus, uint8_t etype)
+                   struct sts_entry_fx00 *pkt)
 {
        srb_t *sp;
        struct qla_hw_data *ha = vha->hw;
@@ -2695,9 +2693,6 @@ qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
        struct req_que *req = NULL;
        int res = DID_ERROR << 16;
 
-       ql_dbg(ql_dbg_async, vha, 0x507f,
-           "type of error status in response: 0x%x\n", estatus);
-
        req = ha->req_q_map[que];
 
        sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
@@ -2745,9 +2740,11 @@ qlafx00_process_response_queue(struct scsi_qla_host *vha,
 
                if (pkt->entry_status != 0 &&
                    pkt->entry_type != IOCTL_IOSB_TYPE_FX00) {
+                       ql_dbg(ql_dbg_async, vha, 0x507f,
+                              "type of error status in response: 0x%x\n",
+                              pkt->entry_status);
                        qlafx00_error_entry(vha, rsp,
-                           (struct sts_entry_fx00 *)pkt, pkt->entry_status,
-                           pkt->entry_type);
+                                           (struct sts_entry_fx00 *)pkt);
                        continue;
                }
 
@@ -2867,7 +2864,7 @@ qlafx00_async_event(scsi_qla_host_t *vha)
 /**
  * qlafx00x_mbx_completion() - Process mailbox command completions.
  * @vha: SCSI driver HA context
- * @mb0:
+ * @mb0: value to be written into mailbox register 0
  */
 static void
 qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0)
@@ -2893,7 +2890,7 @@ qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0)
 
 /**
  * qlafx00_intr_handler() - Process interrupts for the ISPFX00.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
index 121e18b3b9f8399ee7c63095761583db2ffc1706..f2f54806f4da9dac0aa90431e3aee47abeef2550 100644 (file)
@@ -2010,7 +2010,7 @@ qla82xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
 
 /**
  * qla82xx_intr_handler() - Process interrupts for the ISP23xx and ISP63xx.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
index 3a2b0282df149531789bddd8cfe4d7ec69ebfbf3..fe856b602e03198686eec132fe55ed7f30676dda 100644 (file)
@@ -3878,7 +3878,7 @@ out:
 #define PF_BITS_MASK           (0xF << 16)
 /**
  * qla8044_intr_handler() - Process interrupts for the ISP8044
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
index 8794e54f43a95d568e88ff4944fb0301d1699271..518f15141170e733d8acee71cf288c31db6c9261 100644 (file)
@@ -1749,7 +1749,7 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
 static void
 __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
 {
-       int cnt, status;
+       int cnt;
        unsigned long flags;
        srb_t *sp;
        scsi_qla_host_t *vha = qp->vha;
@@ -1799,8 +1799,8 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
                                        if (!sp_get(sp)) {
                                                spin_unlock_irqrestore
                                                        (qp->qp_lock_ptr, flags);
-                                               status = qla2xxx_eh_abort(
-                                                   GET_CMD_SP(sp));
+                                               qla2xxx_eh_abort(
+                                                       GET_CMD_SP(sp));
                                                spin_lock_irqsave
                                                        (qp->qp_lock_ptr, flags);
                                        }
index 4499c787165f14f63f9190800886b39374054352..2a3055c799fb613039fb73718ace4fd70b23886d 100644 (file)
@@ -2229,7 +2229,7 @@ qla2x00_erase_flash_sector(struct qla_hw_data *ha, uint32_t addr,
 
 /**
  * qla2x00_get_flash_manufacturer() - Read manufacturer ID from flash chip.
- * @ha:
+ * @ha: host adapter
  * @man_id: Flash manufacturer ID
  * @flash_id: Flash ID
  */
index 39828207bc1d223fc729252e127c43ac61e4f9f2..c4504740f0e2123ba410eada3b84bb3c80ce8fca 100644 (file)
@@ -4540,7 +4540,7 @@ static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun,
        case QLA_TGT_CLEAR_TS:
        case QLA_TGT_ABORT_TS:
                abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id);
-               /* drop through */
+               /* fall through */
        case QLA_TGT_CLEAR_ACA:
                h = qlt_find_qphint(vha, mcmd->unpacked_lun);
                mcmd->qpair = h->qpair;
@@ -6598,9 +6598,9 @@ static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
  * qla_tgt_lport_register - register lport with external module
  *
  * @target_lport_ptr: pointer for tcm_qla2xxx specific lport data
- * @phys_wwpn:
- * @npiv_wwpn:
- * @npiv_wwnn:
+ * @phys_wwpn: physical port WWPN
+ * @npiv_wwpn: NPIV WWPN
+ * @npiv_wwnn: NPIV WWNN
  * @callback:  lport initialization callback for tcm_qla2xxx code
  */
 int qlt_lport_register(void *target_lport_ptr, u64 phys_wwpn,
index 7c128132799e0cd355c7bae1298d5a776b6c8ec9..4c28fa938ac76a28ddfba64fbfe35e4e2054c3f6 100644 (file)
@@ -329,8 +329,8 @@ struct knav_range_ops {
 };
 
 struct knav_irq_info {
-       int     irq;
-       u32     cpu_map;
+       int             irq;
+       struct cpumask  *cpu_mask;
 };
 
 struct knav_range_info {
index 316e82e46f6cbff0500ba8409529dffc3d0eddbe..2f7fb2dcc1d66d130580b57d7574ecabbbdb1546 100644 (file)
@@ -205,18 +205,18 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range,
 {
        struct knav_device *kdev = range->kdev;
        struct knav_acc_channel *acc;
-       unsigned long cpu_map;
+       struct cpumask *cpu_mask;
        int ret = 0, irq;
        u32 old, new;
 
        if (range->flags & RANGE_MULTI_QUEUE) {
                acc = range->acc;
                irq = range->irqs[0].irq;
-               cpu_map = range->irqs[0].cpu_map;
+               cpu_mask = range->irqs[0].cpu_mask;
        } else {
                acc = range->acc + queue;
                irq = range->irqs[queue].irq;
-               cpu_map = range->irqs[queue].cpu_map;
+               cpu_mask = range->irqs[queue].cpu_mask;
        }
 
        old = acc->open_mask;
@@ -239,8 +239,8 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range,
                        acc->name, acc->name);
                ret = request_irq(irq, knav_acc_int_handler, 0, acc->name,
                                  range);
-               if (!ret && cpu_map) {
-                       ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+               if (!ret && cpu_mask) {
+                       ret = irq_set_affinity_hint(irq, cpu_mask);
                        if (ret) {
                                dev_warn(range->kdev->dev,
                                         "Failed to set IRQ affinity\n");
index b5d5673c255cad4f9326a318c5c441ba288a7591..8b418379272da7438f495b2283140e8beba7e9a1 100644 (file)
@@ -118,19 +118,17 @@ static int knav_queue_setup_irq(struct knav_range_info *range,
                          struct knav_queue_inst *inst)
 {
        unsigned queue = inst->id - range->queue_base;
-       unsigned long cpu_map;
        int ret = 0, irq;
 
        if (range->flags & RANGE_HAS_IRQ) {
                irq = range->irqs[queue].irq;
-               cpu_map = range->irqs[queue].cpu_map;
                ret = request_irq(irq, knav_queue_int_handler, 0,
                                        inst->irq_name, inst);
                if (ret)
                        return ret;
                disable_irq(irq);
-               if (cpu_map) {
-                       ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+               if (range->irqs[queue].cpu_mask) {
+                       ret = irq_set_affinity_hint(irq, range->irqs[queue].cpu_mask);
                        if (ret) {
                                dev_warn(range->kdev->dev,
                                         "Failed to set IRQ affinity\n");
@@ -1262,9 +1260,19 @@ static int knav_setup_queue_range(struct knav_device *kdev,
 
                range->num_irqs++;
 
-               if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3)
-                       range->irqs[i].cpu_map =
-                               (oirq.args[2] & 0x0000ff00) >> 8;
+               if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3) {
+                       unsigned long mask;
+                       int bit;
+
+                       range->irqs[i].cpu_mask = devm_kzalloc(dev,
+                                                              cpumask_size(), GFP_KERNEL);
+                       if (!range->irqs[i].cpu_mask)
+                               return -ENOMEM;
+
+                       mask = (oirq.args[2] & 0x0000ff00) >> 8;
+                       for_each_set_bit(bit, &mask, BITS_PER_LONG)
+                               cpumask_set_cpu(bit, range->irqs[i].cpu_mask);
+               }
        }
 
        range->num_irqs = min(range->num_irqs, range->num_queues);
index 1227872227dc446b70b492600f52214c256098ad..36b742932c724c8e6f80ca29cb36d0d20208a9a7 100644 (file)
@@ -1245,8 +1245,7 @@ static int iscsit_do_rx_data(
                return -1;
 
        memset(&msg, 0, sizeof(struct msghdr));
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC,
-                     count->iov, count->iov_count, data);
+       iov_iter_kvec(&msg.msg_iter, READ, count->iov, count->iov_count, data);
 
        while (msg_data_left(&msg)) {
                rx_loop = sock_recvmsg(conn->sock, &msg, MSG_WAITALL);
@@ -1302,8 +1301,7 @@ int tx_data(
 
        memset(&msg, 0, sizeof(struct msghdr));
 
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
-                     iov, iov_count, data);
+       iov_iter_kvec(&msg.msg_iter, WRITE, iov, iov_count, data);
 
        while (msg_data_left(&msg)) {
                int tx_loop = sock_sendmsg(conn->sock, &msg);
index e46ca968009c06a2958e347104168cca32c37278..4f134b0c3e29e012cc05191545cf5ac6733f9282 100644 (file)
@@ -268,7 +268,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
        }
        transport_kunmap_data_sg(cmd);
 
-       target_complete_cmd(cmd, GOOD);
+       target_complete_cmd_with_length(cmd, GOOD, rd_len + 4);
        return 0;
 }
 
index 16751ae55d7b6f64b2bc9551f588f8b0e5d06cb8..49b110d1b972b671b17f0e1e1dab588628155bc8 100644 (file)
@@ -303,7 +303,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                len += sg->length;
        }
 
-       iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len);
+       iov_iter_bvec(&iter, is_write, bvec, sgl_nents, len);
 
        aio_cmd->cmd = cmd;
        aio_cmd->len = len;
@@ -353,7 +353,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
                len += sg->length;
        }
 
-       iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len);
+       iov_iter_bvec(&iter, READ, bvec, sgl_nents, len);
        if (is_write)
                ret = vfs_iter_write(fd, &iter, &pos, 0);
        else
@@ -490,7 +490,7 @@ fd_execute_write_same(struct se_cmd *cmd)
                len += se_dev->dev_attrib.block_size;
        }
 
-       iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len);
+       iov_iter_bvec(&iter, READ, bvec, nolb, len);
        ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0);
 
        kfree(bvec);
index 4cf33e2cc7058843fd547ffc2da8a02a262dd5e8..e31e4fc31aa150767c3fc910e6348b6f4a34b7fe 100644 (file)
@@ -205,19 +205,19 @@ void transport_subsystem_check_init(void)
        if (sub_api_initialized)
                return;
 
-       ret = request_module("target_core_iblock");
+       ret = IS_ENABLED(CONFIG_TCM_IBLOCK) && request_module("target_core_iblock");
        if (ret != 0)
                pr_err("Unable to load target_core_iblock\n");
 
-       ret = request_module("target_core_file");
+       ret = IS_ENABLED(CONFIG_TCM_FILEIO) && request_module("target_core_file");
        if (ret != 0)
                pr_err("Unable to load target_core_file\n");
 
-       ret = request_module("target_core_pscsi");
+       ret = IS_ENABLED(CONFIG_TCM_PSCSI) && request_module("target_core_pscsi");
        if (ret != 0)
                pr_err("Unable to load target_core_pscsi\n");
 
-       ret = request_module("target_core_user");
+       ret = IS_ENABLED(CONFIG_TCM_USER2) && request_module("target_core_user");
        if (ret != 0)
                pr_err("Unable to load target_core_user\n");
 
index ff6ba6d86cd8bf9ba43349fde40ec2d576ffed05..cc56cb3b3ecaa222222587da30460a58b2685bd7 100644 (file)
@@ -1614,10 +1614,10 @@ static void sci_request_dma(struct uart_port *port)
                hrtimer_init(&s->rx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
                s->rx_timer.function = rx_timer_fn;
 
+               s->chan_rx_saved = s->chan_rx = chan;
+
                if (port->type == PORT_SCIFA || port->type == PORT_SCIFB)
                        sci_submit_rx(s);
-
-               s->chan_rx_saved = s->chan_rx = chan;
        }
 }
 
@@ -3102,6 +3102,7 @@ static struct uart_driver sci_uart_driver = {
 static int sci_remove(struct platform_device *dev)
 {
        struct sci_port *port = platform_get_drvdata(dev);
+       unsigned int type = port->port.type;    /* uart_remove_... clears it */
 
        sci_ports_in_use &= ~BIT(port->port.line);
        uart_remove_one_port(&sci_uart_driver, &port->port);
@@ -3112,8 +3113,7 @@ static int sci_remove(struct platform_device *dev)
                sysfs_remove_file(&dev->dev.kobj,
                                  &dev_attr_rx_fifo_trigger.attr);
        }
-       if (port->port.type == PORT_SCIFA || port->port.type == PORT_SCIFB ||
-           port->port.type == PORT_HSCIF) {
+       if (type == PORT_SCIFA || type == PORT_SCIFB || type == PORT_HSCIF) {
                sysfs_remove_file(&dev->dev.kobj,
                                  &dev_attr_rx_fifo_timeout.attr);
        }
index 7576ceace57151a21007847f14dcf091005f09bb..f438eaa682463bffe42c27d923fe8426adef0926 100644 (file)
@@ -77,7 +77,7 @@ speed_t tty_termios_baud_rate(struct ktermios *termios)
                else
                        cbaud += 15;
        }
-       return baud_table[cbaud];
+       return cbaud >= n_baud_table ? 0 : baud_table[cbaud];
 }
 EXPORT_SYMBOL(tty_termios_baud_rate);
 
@@ -113,7 +113,7 @@ speed_t tty_termios_input_baud_rate(struct ktermios *termios)
                else
                        cbaud += 15;
        }
-       return baud_table[cbaud];
+       return cbaud >= n_baud_table ? 0 : baud_table[cbaud];
 #else  /* IBSHIFT */
        return tty_termios_baud_rate(termios);
 #endif /* IBSHIFT */
index 55370e651db31424db2330b80bd6bd4c4f9d02b8..41ec8e5010f30a544b82ca439cc5a481fe499b19 100644 (file)
@@ -1548,7 +1548,7 @@ static void csi_K(struct vc_data *vc, int vpar)
        scr_memsetw(start + offset, vc->vc_video_erase_char, 2 * count);
        vc->vc_need_wrap = 0;
        if (con_should_update(vc))
-               do_update_region(vc, (unsigned long) start, count);
+               do_update_region(vc, (unsigned long)(start + offset), count);
 }
 
 static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar positions */
index e36d6c73c4a4184c6246b14ab27b8e0ecb393be8..78118883f96c8ffdf868a4474ba569be59857865 100644 (file)
@@ -23,6 +23,16 @@ config TYPEC_UCSI
 
 if TYPEC_UCSI
 
+config UCSI_CCG
+       tristate "UCSI Interface Driver for Cypress CCGx"
+       depends on I2C
+       help
+         This driver enables UCSI support on platforms that expose a
+         Cypress CCGx Type-C controller over I2C interface.
+
+         To compile the driver as a module, choose M here: the module will be
+         called ucsi_ccg.
+
 config UCSI_ACPI
        tristate "UCSI ACPI Interface Driver"
        depends on ACPI
index 7afbea5122077b3dd0cbe217ad7c839837f499b4..2f4900b26210e245a65115ed1280bab41c19f62d 100644 (file)
@@ -8,3 +8,5 @@ typec_ucsi-y                    := ucsi.o
 typec_ucsi-$(CONFIG_TRACING)   += trace.o
 
 obj-$(CONFIG_UCSI_ACPI)                += ucsi_acpi.o
+
+obj-$(CONFIG_UCSI_CCG)         += ucsi_ccg.o
diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
new file mode 100644 (file)
index 0000000..de8a43b
--- /dev/null
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * UCSI driver for Cypress CCGx Type-C controller
+ *
+ * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved.
+ * Author: Ajay Gupta <ajayg@nvidia.com>
+ *
+ * Some code borrowed from drivers/usb/typec/ucsi/ucsi_acpi.c
+ */
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include <asm/unaligned.h>
+#include "ucsi.h"
+
+struct ucsi_ccg {
+       struct device *dev;
+       struct ucsi *ucsi;
+       struct ucsi_ppm ppm;
+       struct i2c_client *client;
+};
+
+#define CCGX_RAB_INTR_REG                      0x06
+#define CCGX_RAB_UCSI_CONTROL                  0x39
+#define CCGX_RAB_UCSI_CONTROL_START            BIT(0)
+#define CCGX_RAB_UCSI_CONTROL_STOP             BIT(1)
+#define CCGX_RAB_UCSI_DATA_BLOCK(offset)       (0xf000 | ((offset) & 0xff))
+
+static int ccg_read(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len)
+{
+       struct i2c_client *client = uc->client;
+       const struct i2c_adapter_quirks *quirks = client->adapter->quirks;
+       unsigned char buf[2];
+       struct i2c_msg msgs[] = {
+               {
+                       .addr   = client->addr,
+                       .flags  = 0x0,
+                       .len    = sizeof(buf),
+                       .buf    = buf,
+               },
+               {
+                       .addr   = client->addr,
+                       .flags  = I2C_M_RD,
+                       .buf    = data,
+               },
+       };
+       u32 rlen, rem_len = len, max_read_len = len;
+       int status;
+
+       /* check any max_read_len limitation on i2c adapter */
+       if (quirks && quirks->max_read_len)
+               max_read_len = quirks->max_read_len;
+
+       while (rem_len > 0) {
+               msgs[1].buf = &data[len - rem_len];
+               rlen = min_t(u16, rem_len, max_read_len);
+               msgs[1].len = rlen;
+               put_unaligned_le16(rab, buf);
+               status = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+               if (status < 0) {
+                       dev_err(uc->dev, "i2c_transfer failed %d\n", status);
+                       return status;
+               }
+               rab += rlen;
+               rem_len -= rlen;
+       }
+
+       return 0;
+}
+
+static int ccg_write(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len)
+{
+       struct i2c_client *client = uc->client;
+       unsigned char *buf;
+       struct i2c_msg msgs[] = {
+               {
+                       .addr   = client->addr,
+                       .flags  = 0x0,
+               }
+       };
+       int status;
+
+       buf = kzalloc(len + sizeof(rab), GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       put_unaligned_le16(rab, buf);
+       memcpy(buf + sizeof(rab), data, len);
+
+       msgs[0].len = len + sizeof(rab);
+       msgs[0].buf = buf;
+
+       status = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+       if (status < 0) {
+               dev_err(uc->dev, "i2c_transfer failed %d\n", status);
+               kfree(buf);
+               return status;
+       }
+
+       kfree(buf);
+       return 0;
+}
+
+static int ucsi_ccg_init(struct ucsi_ccg *uc)
+{
+       unsigned int count = 10;
+       u8 data;
+       int status;
+
+       data = CCGX_RAB_UCSI_CONTROL_STOP;
+       status = ccg_write(uc, CCGX_RAB_UCSI_CONTROL, &data, sizeof(data));
+       if (status < 0)
+               return status;
+
+       data = CCGX_RAB_UCSI_CONTROL_START;
+       status = ccg_write(uc, CCGX_RAB_UCSI_CONTROL, &data, sizeof(data));
+       if (status < 0)
+               return status;
+
+       /*
+        * Flush CCGx RESPONSE queue by acking interrupts. Above ucsi control
+        * register write will push response which must be cleared.
+        */
+       do {
+               status = ccg_read(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
+               if (status < 0)
+                       return status;
+
+               if (!data)
+                       return 0;
+
+               status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
+               if (status < 0)
+                       return status;
+
+               usleep_range(10000, 11000);
+       } while (--count);
+
+       return -ETIMEDOUT;
+}
+
+static int ucsi_ccg_send_data(struct ucsi_ccg *uc)
+{
+       u8 *ppm = (u8 *)uc->ppm.data;
+       int status;
+       u16 rab;
+
+       rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, message_out));
+       status = ccg_write(uc, rab, ppm +
+                          offsetof(struct ucsi_data, message_out),
+                          sizeof(uc->ppm.data->message_out));
+       if (status < 0)
+               return status;
+
+       rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, ctrl));
+       return ccg_write(uc, rab, ppm + offsetof(struct ucsi_data, ctrl),
+                        sizeof(uc->ppm.data->ctrl));
+}
+
+static int ucsi_ccg_recv_data(struct ucsi_ccg *uc)
+{
+       u8 *ppm = (u8 *)uc->ppm.data;
+       int status;
+       u16 rab;
+
+       rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, cci));
+       status = ccg_read(uc, rab, ppm + offsetof(struct ucsi_data, cci),
+                         sizeof(uc->ppm.data->cci));
+       if (status < 0)
+               return status;
+
+       rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, message_in));
+       return ccg_read(uc, rab, ppm + offsetof(struct ucsi_data, message_in),
+                       sizeof(uc->ppm.data->message_in));
+}
+
+static int ucsi_ccg_ack_interrupt(struct ucsi_ccg *uc)
+{
+       int status;
+       unsigned char data;
+
+       status = ccg_read(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
+       if (status < 0)
+               return status;
+
+       return ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
+}
+
+static int ucsi_ccg_sync(struct ucsi_ppm *ppm)
+{
+       struct ucsi_ccg *uc = container_of(ppm, struct ucsi_ccg, ppm);
+       int status;
+
+       status = ucsi_ccg_recv_data(uc);
+       if (status < 0)
+               return status;
+
+       /* ack interrupt to allow next command to run */
+       return ucsi_ccg_ack_interrupt(uc);
+}
+
+static int ucsi_ccg_cmd(struct ucsi_ppm *ppm, struct ucsi_control *ctrl)
+{
+       struct ucsi_ccg *uc = container_of(ppm, struct ucsi_ccg, ppm);
+
+       ppm->data->ctrl.raw_cmd = ctrl->raw_cmd;
+       return ucsi_ccg_send_data(uc);
+}
+
+static irqreturn_t ccg_irq_handler(int irq, void *data)
+{
+       struct ucsi_ccg *uc = data;
+
+       ucsi_notify(uc->ucsi);
+
+       return IRQ_HANDLED;
+}
+
+static int ucsi_ccg_probe(struct i2c_client *client,
+                         const struct i2c_device_id *id)
+{
+       struct device *dev = &client->dev;
+       struct ucsi_ccg *uc;
+       int status;
+       u16 rab;
+
+       uc = devm_kzalloc(dev, sizeof(*uc), GFP_KERNEL);
+       if (!uc)
+               return -ENOMEM;
+
+       uc->ppm.data = devm_kzalloc(dev, sizeof(struct ucsi_data), GFP_KERNEL);
+       if (!uc->ppm.data)
+               return -ENOMEM;
+
+       uc->ppm.cmd = ucsi_ccg_cmd;
+       uc->ppm.sync = ucsi_ccg_sync;
+       uc->dev = dev;
+       uc->client = client;
+
+       /* reset ccg device and initialize ucsi */
+       status = ucsi_ccg_init(uc);
+       if (status < 0) {
+               dev_err(uc->dev, "ucsi_ccg_init failed - %d\n", status);
+               return status;
+       }
+
+       status = devm_request_threaded_irq(dev, client->irq, NULL,
+                                          ccg_irq_handler,
+                                          IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+                                          dev_name(dev), uc);
+       if (status < 0) {
+               dev_err(uc->dev, "request_threaded_irq failed - %d\n", status);
+               return status;
+       }
+
+       uc->ucsi = ucsi_register_ppm(dev, &uc->ppm);
+       if (IS_ERR(uc->ucsi)) {
+               dev_err(uc->dev, "ucsi_register_ppm failed\n");
+               return PTR_ERR(uc->ucsi);
+       }
+
+       rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, version));
+       status = ccg_read(uc, rab, (u8 *)(uc->ppm.data) +
+                         offsetof(struct ucsi_data, version),
+                         sizeof(uc->ppm.data->version));
+       if (status < 0) {
+               ucsi_unregister_ppm(uc->ucsi);
+               return status;
+       }
+
+       i2c_set_clientdata(client, uc);
+       return 0;
+}
+
+static int ucsi_ccg_remove(struct i2c_client *client)
+{
+       struct ucsi_ccg *uc = i2c_get_clientdata(client);
+
+       ucsi_unregister_ppm(uc->ucsi);
+
+       return 0;
+}
+
+static const struct i2c_device_id ucsi_ccg_device_id[] = {
+       {"ccgx-ucsi", 0},
+       {}
+};
+MODULE_DEVICE_TABLE(i2c, ucsi_ccg_device_id);
+
+static struct i2c_driver ucsi_ccg_driver = {
+       .driver = {
+               .name = "ucsi_ccg",
+       },
+       .probe = ucsi_ccg_probe,
+       .remove = ucsi_ccg_remove,
+       .id_table = ucsi_ccg_device_id,
+};
+
+module_i2c_driver(ucsi_ccg_driver);
+
+MODULE_AUTHOR("Ajay Gupta <ajayg@nvidia.com>");
+MODULE_DESCRIPTION("UCSI driver for Cypress CCGx Type-C controller");
+MODULE_LICENSE("GPL v2");
index 9756752c0681f99c2acb1aaf1213ea23e03055f9..45da3e01c7b03ae04b3c889e73d140baa2c9dc7f 100644 (file)
@@ -309,7 +309,7 @@ int usbip_recv(struct socket *sock, void *buf, int size)
        if (!sock || !buf || !size)
                return -EINVAL;
 
-       iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
+       iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
 
        usbip_dbg_xmit("enter\n");
 
index c24bb690680b4104d6621c5b984d869f51af2cd3..50dffe83714c63f180f4ceba7dddeb8498021322 100644 (file)
@@ -203,6 +203,19 @@ struct vhost_scsi {
        int vs_events_nr; /* num of pending events, protected by vq->mutex */
 };
 
+/*
+ * Context for processing request and control queue operations.
+ */
+struct vhost_scsi_ctx {
+       int head;
+       unsigned int out, in;
+       size_t req_size, rsp_size;
+       size_t out_size, in_size;
+       u8 *target, *lunp;
+       void *req;
+       struct iov_iter out_iter;
+};
+
 static struct workqueue_struct *vhost_scsi_workqueue;
 
 /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
@@ -800,24 +813,120 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
                pr_err("Faulted on virtio_scsi_cmd_resp\n");
 }
 
+static int
+vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
+                   struct vhost_scsi_ctx *vc)
+{
+       int ret = -ENXIO;
+
+       vc->head = vhost_get_vq_desc(vq, vq->iov,
+                                    ARRAY_SIZE(vq->iov), &vc->out, &vc->in,
+                                    NULL, NULL);
+
+       pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
+                vc->head, vc->out, vc->in);
+
+       /* On error, stop handling until the next kick. */
+       if (unlikely(vc->head < 0))
+               goto done;
+
+       /* Nothing new?  Wait for eventfd to tell us they refilled. */
+       if (vc->head == vq->num) {
+               if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
+                       vhost_disable_notify(&vs->dev, vq);
+                       ret = -EAGAIN;
+               }
+               goto done;
+       }
+
+       /*
+        * Get the size of request and response buffers.
+        * FIXME: Not correct for BIDI operation
+        */
+       vc->out_size = iov_length(vq->iov, vc->out);
+       vc->in_size = iov_length(&vq->iov[vc->out], vc->in);
+
+       /*
+        * Copy over the virtio-scsi request header, which for a
+        * ANY_LAYOUT enabled guest may span multiple iovecs, or a
+        * single iovec may contain both the header + outgoing
+        * WRITE payloads.
+        *
+        * copy_from_iter() will advance out_iter, so that it will
+        * point at the start of the outgoing WRITE payload, if
+        * DMA_TO_DEVICE is set.
+        */
+       iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size);
+       ret = 0;
+
+done:
+       return ret;
+}
+
+static int
+vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc)
+{
+       if (unlikely(vc->in_size < vc->rsp_size)) {
+               vq_err(vq,
+                      "Response buf too small, need min %zu bytes got %zu",
+                      vc->rsp_size, vc->in_size);
+               return -EINVAL;
+       } else if (unlikely(vc->out_size < vc->req_size)) {
+               vq_err(vq,
+                      "Request buf too small, need min %zu bytes got %zu",
+                      vc->req_size, vc->out_size);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+static int
+vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
+                  struct vhost_scsi_tpg **tpgp)
+{
+       int ret = -EIO;
+
+       if (unlikely(!copy_from_iter_full(vc->req, vc->req_size,
+                                         &vc->out_iter))) {
+               vq_err(vq, "Faulted on copy_from_iter\n");
+       } else if (unlikely(*vc->lunp != 1)) {
+               /* virtio-scsi spec requires byte 0 of the lun to be 1 */
+               vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp);
+       } else {
+               struct vhost_scsi_tpg **vs_tpg, *tpg;
+
+               vs_tpg = vq->private_data;      /* validated at handler entry */
+
+               tpg = READ_ONCE(vs_tpg[*vc->target]);
+               if (unlikely(!tpg)) {
+                       vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
+               } else {
+                       if (tpgp)
+                               *tpgp = tpg;
+                       ret = 0;
+               }
+       }
+
+       return ret;
+}
+
 static void
 vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 {
        struct vhost_scsi_tpg **vs_tpg, *tpg;
        struct virtio_scsi_cmd_req v_req;
        struct virtio_scsi_cmd_req_pi v_req_pi;
+       struct vhost_scsi_ctx vc;
        struct vhost_scsi_cmd *cmd;
-       struct iov_iter out_iter, in_iter, prot_iter, data_iter;
+       struct iov_iter in_iter, prot_iter, data_iter;
        u64 tag;
        u32 exp_data_len, data_direction;
-       unsigned int out = 0, in = 0;
-       int head, ret, prot_bytes;
-       size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
-       size_t out_size, in_size;
+       int ret, prot_bytes;
        u16 lun;
-       u8 *target, *lunp, task_attr;
+       u8 task_attr;
        bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI);
-       void *req, *cdb;
+       void *cdb;
 
        mutex_lock(&vq->mutex);
        /*
@@ -828,85 +937,47 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
        if (!vs_tpg)
                goto out;
 
+       memset(&vc, 0, sizeof(vc));
+       vc.rsp_size = sizeof(struct virtio_scsi_cmd_resp);
+
        vhost_disable_notify(&vs->dev, vq);
 
        for (;;) {
-               head = vhost_get_vq_desc(vq, vq->iov,
-                                        ARRAY_SIZE(vq->iov), &out, &in,
-                                        NULL, NULL);
-               pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
-                        head, out, in);
-               /* On error, stop handling until the next kick. */
-               if (unlikely(head < 0))
-                       break;
-               /* Nothing new?  Wait for eventfd to tell us they refilled. */
-               if (head == vq->num) {
-                       if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
-                               vhost_disable_notify(&vs->dev, vq);
-                               continue;
-                       }
-                       break;
-               }
-               /*
-                * Check for a sane response buffer so we can report early
-                * errors back to the guest.
-                */
-               if (unlikely(vq->iov[out].iov_len < rsp_size)) {
-                       vq_err(vq, "Expecting at least virtio_scsi_cmd_resp"
-                               " size, got %zu bytes\n", vq->iov[out].iov_len);
-                       break;
-               }
+               ret = vhost_scsi_get_desc(vs, vq, &vc);
+               if (ret)
+                       goto err;
+
                /*
                 * Setup pointers and values based upon different virtio-scsi
                 * request header if T10_PI is enabled in KVM guest.
                 */
                if (t10_pi) {
-                       req = &v_req_pi;
-                       req_size = sizeof(v_req_pi);
-                       lunp = &v_req_pi.lun[0];
-                       target = &v_req_pi.lun[1];
+                       vc.req = &v_req_pi;
+                       vc.req_size = sizeof(v_req_pi);
+                       vc.lunp = &v_req_pi.lun[0];
+                       vc.target = &v_req_pi.lun[1];
                } else {
-                       req = &v_req;
-                       req_size = sizeof(v_req);
-                       lunp = &v_req.lun[0];
-                       target = &v_req.lun[1];
+                       vc.req = &v_req;
+                       vc.req_size = sizeof(v_req);
+                       vc.lunp = &v_req.lun[0];
+                       vc.target = &v_req.lun[1];
                }
-               /*
-                * FIXME: Not correct for BIDI operation
-                */
-               out_size = iov_length(vq->iov, out);
-               in_size = iov_length(&vq->iov[out], in);
 
                /*
-                * Copy over the virtio-scsi request header, which for a
-                * ANY_LAYOUT enabled guest may span multiple iovecs, or a
-                * single iovec may contain both the header + outgoing
-                * WRITE payloads.
-                *
-                * copy_from_iter() will advance out_iter, so that it will
-                * point at the start of the outgoing WRITE payload, if
-                * DMA_TO_DEVICE is set.
+                * Validate the size of request and response buffers.
+                * Check for a sane response buffer so we can report
+                * early errors back to the guest.
                 */
-               iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size);
+               ret = vhost_scsi_chk_size(vq, &vc);
+               if (ret)
+                       goto err;
 
-               if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) {
-                       vq_err(vq, "Faulted on copy_from_iter\n");
-                       vhost_scsi_send_bad_target(vs, vq, head, out);
-                       continue;
-               }
-               /* virtio-scsi spec requires byte 0 of the lun to be 1 */
-               if (unlikely(*lunp != 1)) {
-                       vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp);
-                       vhost_scsi_send_bad_target(vs, vq, head, out);
-                       continue;
-               }
+               ret = vhost_scsi_get_req(vq, &vc, &tpg);
+               if (ret)
+                       goto err;
+
+               ret = -EIO;     /* bad target on any error from here on */
 
-               tpg = READ_ONCE(vs_tpg[*target]);
-               if (unlikely(!tpg)) {
-                       /* Target does not exist, fail the request */
-                       vhost_scsi_send_bad_target(vs, vq, head, out);
-                       continue;
-               }
                /*
                 * Determine data_direction by calculating the total outgoing
                 * iovec sizes + incoming iovec sizes vs. virtio-scsi request +
@@ -924,17 +995,17 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                 */
                prot_bytes = 0;
 
-               if (out_size > req_size) {
+               if (vc.out_size > vc.req_size) {
                        data_direction = DMA_TO_DEVICE;
-                       exp_data_len = out_size - req_size;
-                       data_iter = out_iter;
-               } else if (in_size > rsp_size) {
+                       exp_data_len = vc.out_size - vc.req_size;
+                       data_iter = vc.out_iter;
+               } else if (vc.in_size > vc.rsp_size) {
                        data_direction = DMA_FROM_DEVICE;
-                       exp_data_len = in_size - rsp_size;
+                       exp_data_len = vc.in_size - vc.rsp_size;
 
-                       iov_iter_init(&in_iter, READ, &vq->iov[out], in,
-                                     rsp_size + exp_data_len);
-                       iov_iter_advance(&in_iter, rsp_size);
+                       iov_iter_init(&in_iter, READ, &vq->iov[vc.out], vc.in,
+                                     vc.rsp_size + exp_data_len);
+                       iov_iter_advance(&in_iter, vc.rsp_size);
                        data_iter = in_iter;
                } else {
                        data_direction = DMA_NONE;
@@ -950,21 +1021,20 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                                if (data_direction != DMA_TO_DEVICE) {
                                        vq_err(vq, "Received non zero pi_bytesout,"
                                                " but wrong data_direction\n");
-                                       vhost_scsi_send_bad_target(vs, vq, head, out);
-                                       continue;
+                                       goto err;
                                }
                                prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout);
                        } else if (v_req_pi.pi_bytesin) {
                                if (data_direction != DMA_FROM_DEVICE) {
                                        vq_err(vq, "Received non zero pi_bytesin,"
                                                " but wrong data_direction\n");
-                                       vhost_scsi_send_bad_target(vs, vq, head, out);
-                                       continue;
+                                       goto err;
                                }
                                prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin);
                        }
                        /*
-                        * Set prot_iter to data_iter, and advance past any
+                        * Set prot_iter to data_iter and truncate it to
+                        * prot_bytes, and advance data_iter past any
                         * preceeding prot_bytes that may be present.
                         *
                         * Also fix up the exp_data_len to reflect only the
@@ -973,6 +1043,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                        if (prot_bytes) {
                                exp_data_len -= prot_bytes;
                                prot_iter = data_iter;
+                               iov_iter_truncate(&prot_iter, prot_bytes);
                                iov_iter_advance(&data_iter, prot_bytes);
                        }
                        tag = vhost64_to_cpu(vq, v_req_pi.tag);
@@ -996,8 +1067,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                        vq_err(vq, "Received SCSI CDB with command_size: %d that"
                                " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
                                scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE);
-                       vhost_scsi_send_bad_target(vs, vq, head, out);
-                       continue;
+                               goto err;
                }
                cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
                                         exp_data_len + prot_bytes,
@@ -1005,13 +1075,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                if (IS_ERR(cmd)) {
                        vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
                               PTR_ERR(cmd));
-                       vhost_scsi_send_bad_target(vs, vq, head, out);
-                       continue;
+                       goto err;
                }
                cmd->tvc_vhost = vs;
                cmd->tvc_vq = vq;
-               cmd->tvc_resp_iov = vq->iov[out];
-               cmd->tvc_in_iovs = in;
+               cmd->tvc_resp_iov = vq->iov[vc.out];
+               cmd->tvc_in_iovs = vc.in;
 
                pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
                         cmd->tvc_cdb[0], cmd->tvc_lun);
@@ -1019,14 +1088,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                         " %d\n", cmd, exp_data_len, prot_bytes, data_direction);
 
                if (data_direction != DMA_NONE) {
-                       ret = vhost_scsi_mapal(cmd,
-                                              prot_bytes, &prot_iter,
-                                              exp_data_len, &data_iter);
-                       if (unlikely(ret)) {
+                       if (unlikely(vhost_scsi_mapal(cmd, prot_bytes,
+                                                     &prot_iter, exp_data_len,
+                                                     &data_iter))) {
                                vq_err(vq, "Failed to map iov to sgl\n");
                                vhost_scsi_release_cmd(&cmd->tvc_se_cmd);
-                               vhost_scsi_send_bad_target(vs, vq, head, out);
-                               continue;
+                               goto err;
                        }
                }
                /*
@@ -1034,7 +1101,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                 * complete the virtio-scsi request in TCM callback context via
                 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
                 */
-               cmd->tvc_vq_desc = head;
+               cmd->tvc_vq_desc = vc.head;
                /*
                 * Dispatch cmd descriptor for cmwq execution in process
                 * context provided by vhost_scsi_workqueue.  This also ensures
@@ -1043,6 +1110,166 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                 */
                INIT_WORK(&cmd->work, vhost_scsi_submission_work);
                queue_work(vhost_scsi_workqueue, &cmd->work);
+               ret = 0;
+err:
+               /*
+                * ENXIO:  No more requests, or read error, wait for next kick
+                * EINVAL: Invalid response buffer, drop the request
+                * EIO:    Respond with bad target
+                * EAGAIN: Pending request
+                */
+               if (ret == -ENXIO)
+                       break;
+               else if (ret == -EIO)
+                       vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
+       }
+out:
+       mutex_unlock(&vq->mutex);
+}
+
+static void
+vhost_scsi_send_tmf_reject(struct vhost_scsi *vs,
+                          struct vhost_virtqueue *vq,
+                          struct vhost_scsi_ctx *vc)
+{
+       struct virtio_scsi_ctrl_tmf_resp __user *resp;
+       struct virtio_scsi_ctrl_tmf_resp rsp;
+       int ret;
+
+       pr_debug("%s\n", __func__);
+       memset(&rsp, 0, sizeof(rsp));
+       rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
+       resp = vq->iov[vc->out].iov_base;
+       ret = __copy_to_user(resp, &rsp, sizeof(rsp));
+       if (!ret)
+               vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
+       else
+               pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n");
+}
+
+static void
+vhost_scsi_send_an_resp(struct vhost_scsi *vs,
+                       struct vhost_virtqueue *vq,
+                       struct vhost_scsi_ctx *vc)
+{
+       struct virtio_scsi_ctrl_an_resp __user *resp;
+       struct virtio_scsi_ctrl_an_resp rsp;
+       int ret;
+
+       pr_debug("%s\n", __func__);
+       memset(&rsp, 0, sizeof(rsp));   /* event_actual = 0 */
+       rsp.response = VIRTIO_SCSI_S_OK;
+       resp = vq->iov[vc->out].iov_base;
+       ret = __copy_to_user(resp, &rsp, sizeof(rsp));
+       if (!ret)
+               vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
+       else
+               pr_err("Faulted on virtio_scsi_ctrl_an_resp\n");
+}
+
+static void
+vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+{
+       union {
+               __virtio32 type;
+               struct virtio_scsi_ctrl_an_req an;
+               struct virtio_scsi_ctrl_tmf_req tmf;
+       } v_req;
+       struct vhost_scsi_ctx vc;
+       size_t typ_size;
+       int ret;
+
+       mutex_lock(&vq->mutex);
+       /*
+        * We can handle the vq only after the endpoint is setup by calling the
+        * VHOST_SCSI_SET_ENDPOINT ioctl.
+        */
+       if (!vq->private_data)
+               goto out;
+
+       memset(&vc, 0, sizeof(vc));
+
+       vhost_disable_notify(&vs->dev, vq);
+
+       for (;;) {
+               ret = vhost_scsi_get_desc(vs, vq, &vc);
+               if (ret)
+                       goto err;
+
+               /*
+                * Get the request type first in order to setup
+                * other parameters dependent on the type.
+                */
+               vc.req = &v_req.type;
+               typ_size = sizeof(v_req.type);
+
+               if (unlikely(!copy_from_iter_full(vc.req, typ_size,
+                                                 &vc.out_iter))) {
+                       vq_err(vq, "Faulted on copy_from_iter tmf type\n");
+                       /*
+                        * The size of the response buffer depends on the
+                        * request type and must be validated against it.
+                        * Since the request type is not known, don't send
+                        * a response.
+                        */
+                       continue;
+               }
+
+               switch (v_req.type) {
+               case VIRTIO_SCSI_T_TMF:
+                       vc.req = &v_req.tmf;
+                       vc.req_size = sizeof(struct virtio_scsi_ctrl_tmf_req);
+                       vc.rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp);
+                       vc.lunp = &v_req.tmf.lun[0];
+                       vc.target = &v_req.tmf.lun[1];
+                       break;
+               case VIRTIO_SCSI_T_AN_QUERY:
+               case VIRTIO_SCSI_T_AN_SUBSCRIBE:
+                       vc.req = &v_req.an;
+                       vc.req_size = sizeof(struct virtio_scsi_ctrl_an_req);
+                       vc.rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp);
+                       vc.lunp = &v_req.an.lun[0];
+                       vc.target = NULL;
+                       break;
+               default:
+                       vq_err(vq, "Unknown control request %d", v_req.type);
+                       continue;
+               }
+
+               /*
+                * Validate the size of request and response buffers.
+                * Check for a sane response buffer so we can report
+                * early errors back to the guest.
+                */
+               ret = vhost_scsi_chk_size(vq, &vc);
+               if (ret)
+                       goto err;
+
+               /*
+                * Get the rest of the request now that its size is known.
+                */
+               vc.req += typ_size;
+               vc.req_size -= typ_size;
+
+               ret = vhost_scsi_get_req(vq, &vc, NULL);
+               if (ret)
+                       goto err;
+
+               if (v_req.type == VIRTIO_SCSI_T_TMF)
+                       vhost_scsi_send_tmf_reject(vs, vq, &vc);
+               else
+                       vhost_scsi_send_an_resp(vs, vq, &vc);
+err:
+               /*
+                * ENXIO:  No more requests, or read error, wait for next kick
+                * EINVAL: Invalid response buffer, drop the request
+                * EIO:    Respond with bad target
+                * EAGAIN: Pending request
+                */
+               if (ret == -ENXIO)
+                       break;
+               else if (ret == -EIO)
+                       vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
        }
 out:
        mutex_unlock(&vq->mutex);
@@ -1050,7 +1277,12 @@ out:
 
 static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
 {
+       struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+                                               poll.work);
+       struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
+
        pr_debug("%s: The handling func for control queue.\n", __func__);
+       vhost_scsi_ctl_handle_vq(vs, vq);
 }
 
 static void
index f52008bb8df76e2760dce41a656be7407aff4738..3a5f81a66d34f0b5d19d09d16f9e07451e7958ff 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/nospec.h>
 
 #include "vhost.h"
 
@@ -1387,6 +1388,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
        if (idx >= d->nvqs)
                return -ENOBUFS;
 
+       idx = array_index_nospec(idx, d->nvqs);
        vq = d->vqs[idx];
 
        mutex_lock(&vq->mutex);
index d1c1f6283729623d86ebea846a7e506653f997c0..728ecd1eea305a50b5a899de53ac873cb8bceca2 100644 (file)
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
+#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
+                                            __GFP_NOMEMALLOC)
+/* The order of free page blocks to report to host */
+#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
+/* The size of a free page block in bytes */
+#define VIRTIO_BALLOON_FREE_PAGE_SIZE \
+       (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
+
 #ifdef CONFIG_BALLOON_COMPACTION
 static struct vfsmount *balloon_mnt;
 #endif
 
+enum virtio_balloon_vq {
+       VIRTIO_BALLOON_VQ_INFLATE,
+       VIRTIO_BALLOON_VQ_DEFLATE,
+       VIRTIO_BALLOON_VQ_STATS,
+       VIRTIO_BALLOON_VQ_FREE_PAGE,
+       VIRTIO_BALLOON_VQ_MAX
+};
+
 struct virtio_balloon {
        struct virtio_device *vdev;
-       struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+       struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
+
+       /* Balloon's own wq for cpu-intensive work items */
+       struct workqueue_struct *balloon_wq;
+       /* The free page reporting work item submitted to the balloon wq */
+       struct work_struct report_free_page_work;
 
        /* The balloon servicing is delegated to a freezable workqueue. */
        struct work_struct update_balloon_stats_work;
@@ -57,6 +78,18 @@ struct virtio_balloon {
        spinlock_t stop_update_lock;
        bool stop_update;
 
+       /* The list of allocated free pages, waiting to be given back to mm */
+       struct list_head free_page_list;
+       spinlock_t free_page_list_lock;
+       /* The number of free page blocks on the above list */
+       unsigned long num_free_page_blocks;
+       /* The cmd id received from host */
+       u32 cmd_id_received;
+       /* The cmd id that is actively in use */
+       __virtio32 cmd_id_active;
+       /* Buffer to store the stop sign */
+       __virtio32 cmd_id_stop;
+
        /* Waiting for host to ack the pages we released. */
        wait_queue_head_t acked;
 
@@ -320,17 +353,6 @@ static void stats_handle_request(struct virtio_balloon *vb)
        virtqueue_kick(vq);
 }
 
-static void virtballoon_changed(struct virtio_device *vdev)
-{
-       struct virtio_balloon *vb = vdev->priv;
-       unsigned long flags;
-
-       spin_lock_irqsave(&vb->stop_update_lock, flags);
-       if (!vb->stop_update)
-               queue_work(system_freezable_wq, &vb->update_balloon_size_work);
-       spin_unlock_irqrestore(&vb->stop_update_lock, flags);
-}
-
 static inline s64 towards_target(struct virtio_balloon *vb)
 {
        s64 target;
@@ -347,6 +369,60 @@ static inline s64 towards_target(struct virtio_balloon *vb)
        return target - vb->num_pages;
 }
 
+/* Gives back @num_to_return blocks of free pages to mm. */
+static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
+                                            unsigned long num_to_return)
+{
+       struct page *page;
+       unsigned long num_returned;
+
+       spin_lock_irq(&vb->free_page_list_lock);
+       for (num_returned = 0; num_returned < num_to_return; num_returned++) {
+               page = balloon_page_pop(&vb->free_page_list);
+               if (!page)
+                       break;
+               free_pages((unsigned long)page_address(page),
+                          VIRTIO_BALLOON_FREE_PAGE_ORDER);
+       }
+       vb->num_free_page_blocks -= num_returned;
+       spin_unlock_irq(&vb->free_page_list_lock);
+
+       return num_returned;
+}
+
+static void virtballoon_changed(struct virtio_device *vdev)
+{
+       struct virtio_balloon *vb = vdev->priv;
+       unsigned long flags;
+       s64 diff = towards_target(vb);
+
+       if (diff) {
+               spin_lock_irqsave(&vb->stop_update_lock, flags);
+               if (!vb->stop_update)
+                       queue_work(system_freezable_wq,
+                                  &vb->update_balloon_size_work);
+               spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+       }
+
+       if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+               virtio_cread(vdev, struct virtio_balloon_config,
+                            free_page_report_cmd_id, &vb->cmd_id_received);
+               if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
+                       /* Pass ULONG_MAX to give back all the free pages */
+                       return_free_pages_to_mm(vb, ULONG_MAX);
+               } else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
+                          vb->cmd_id_received !=
+                          virtio32_to_cpu(vdev, vb->cmd_id_active)) {
+                       spin_lock_irqsave(&vb->stop_update_lock, flags);
+                       if (!vb->stop_update) {
+                               queue_work(vb->balloon_wq,
+                                          &vb->report_free_page_work);
+                       }
+                       spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+               }
+       }
+}
+
 static void update_balloon_size(struct virtio_balloon *vb)
 {
        u32 actual = vb->num_pages;
@@ -389,26 +465,44 @@ static void update_balloon_size_func(struct work_struct *work)
 
 static int init_vqs(struct virtio_balloon *vb)
 {
-       struct virtqueue *vqs[3];
-       vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
-       static const char * const names[] = { "inflate", "deflate", "stats" };
-       int err, nvqs;
+       struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
+       vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
+       const char *names[VIRTIO_BALLOON_VQ_MAX];
+       int err;
 
        /*
-        * We expect two virtqueues: inflate and deflate, and
-        * optionally stat.
+        * Inflateq and deflateq are used unconditionally. The names[]
+        * will be NULL if the related feature is not enabled, which will
+        * cause no allocation for the corresponding virtqueue in find_vqs.
         */
-       nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-       err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
+       callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
+       names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
+       callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
+       names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
+       names[VIRTIO_BALLOON_VQ_STATS] = NULL;
+       names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+
+       if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+               names[VIRTIO_BALLOON_VQ_STATS] = "stats";
+               callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
+       }
+
+       if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+               names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
+               callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+       }
+
+       err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
+                                        vqs, callbacks, names, NULL, NULL);
        if (err)
                return err;
 
-       vb->inflate_vq = vqs[0];
-       vb->deflate_vq = vqs[1];
+       vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
+       vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
        if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
                struct scatterlist sg;
                unsigned int num_stats;
-               vb->stats_vq = vqs[2];
+               vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
 
                /*
                 * Prime this virtqueue with one buffer so the hypervisor can
@@ -426,9 +520,145 @@ static int init_vqs(struct virtio_balloon *vb)
                }
                virtqueue_kick(vb->stats_vq);
        }
+
+       if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+               vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
+
+       return 0;
+}
+
+static int send_cmd_id_start(struct virtio_balloon *vb)
+{
+       struct scatterlist sg;
+       struct virtqueue *vq = vb->free_page_vq;
+       int err, unused;
+
+       /* Detach all the used buffers from the vq */
+       while (virtqueue_get_buf(vq, &unused))
+               ;
+
+       vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received);
+       sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
+       err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
+       if (!err)
+               virtqueue_kick(vq);
+       return err;
+}
+
+static int send_cmd_id_stop(struct virtio_balloon *vb)
+{
+       struct scatterlist sg;
+       struct virtqueue *vq = vb->free_page_vq;
+       int err, unused;
+
+       /* Detach all the used buffers from the vq */
+       while (virtqueue_get_buf(vq, &unused))
+               ;
+
+       sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
+       err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
+       if (!err)
+               virtqueue_kick(vq);
+       return err;
+}
+
+static int get_free_page_and_send(struct virtio_balloon *vb)
+{
+       struct virtqueue *vq = vb->free_page_vq;
+       struct page *page;
+       struct scatterlist sg;
+       int err, unused;
+       void *p;
+
+       /* Detach all the used buffers from the vq */
+       while (virtqueue_get_buf(vq, &unused))
+               ;
+
+       page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
+                          VIRTIO_BALLOON_FREE_PAGE_ORDER);
+       /*
+        * When the allocation returns NULL, it indicates that we have got all
+        * the possible free pages, so return -EINTR to stop.
+        */
+       if (!page)
+               return -EINTR;
+
+       p = page_address(page);
+       sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE);
+       /* There is always 1 entry reserved for the cmd id to use. */
+       if (vq->num_free > 1) {
+               err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
+               if (unlikely(err)) {
+                       free_pages((unsigned long)p,
+                                  VIRTIO_BALLOON_FREE_PAGE_ORDER);
+                       return err;
+               }
+               virtqueue_kick(vq);
+               spin_lock_irq(&vb->free_page_list_lock);
+               balloon_page_push(&vb->free_page_list, page);
+               vb->num_free_page_blocks++;
+               spin_unlock_irq(&vb->free_page_list_lock);
+       } else {
+               /*
+                * The vq has no available entry to add this page block, so
+                * just free it.
+                */
+               free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER);
+       }
+
+       return 0;
+}
+
+static int send_free_pages(struct virtio_balloon *vb)
+{
+       int err;
+       u32 cmd_id_active;
+
+       while (1) {
+               /*
+                * If a stop id or a new cmd id was just received from host,
+                * stop the reporting.
+                */
+               cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
+               if (cmd_id_active != vb->cmd_id_received)
+                       break;
+
+               /*
+                * The free page blocks are allocated and sent to host one by
+                * one.
+                */
+               err = get_free_page_and_send(vb);
+               if (err == -EINTR)
+                       break;
+               else if (unlikely(err))
+                       return err;
+       }
+
        return 0;
 }
 
+static void report_free_page_func(struct work_struct *work)
+{
+       int err;
+       struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
+                                                report_free_page_work);
+       struct device *dev = &vb->vdev->dev;
+
+       /* Start by sending the received cmd id to host with an outbuf. */
+       err = send_cmd_id_start(vb);
+       if (unlikely(err))
+               dev_err(dev, "Failed to send a start id, err = %d\n", err);
+
+       err = send_free_pages(vb);
+       if (unlikely(err))
+               dev_err(dev, "Failed to send a free page, err = %d\n", err);
+
+       /* End by sending a stop id to host with an outbuf. */
+       err = send_cmd_id_stop(vb);
+       if (unlikely(err))
+               dev_err(dev, "Failed to send a stop id, err = %d\n", err);
+}
+
 #ifdef CONFIG_BALLOON_COMPACTION
 /*
  * virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -512,14 +742,23 @@ static struct file_system_type balloon_fs = {
 
 #endif /* CONFIG_BALLOON_COMPACTION */
 
-static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
-                                                 struct shrink_control *sc)
+static unsigned long shrink_free_pages(struct virtio_balloon *vb,
+                                      unsigned long pages_to_free)
 {
-       unsigned long pages_to_free, pages_freed = 0;
-       struct virtio_balloon *vb = container_of(shrinker,
-                                       struct virtio_balloon, shrinker);
+       unsigned long blocks_to_free, blocks_freed;
 
-       pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+       pages_to_free = round_up(pages_to_free,
+                                1 << VIRTIO_BALLOON_FREE_PAGE_ORDER);
+       blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+       blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
+
+       return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+}
+
+static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
+                                         unsigned long pages_to_free)
+{
+       unsigned long pages_freed = 0;
 
        /*
         * One invocation of leak_balloon can deflate at most
@@ -527,12 +766,33 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
         * multiple times to deflate pages till reaching pages_to_free.
         */
        while (vb->num_pages && pages_to_free) {
+               pages_freed += leak_balloon(vb, pages_to_free) /
+                                       VIRTIO_BALLOON_PAGES_PER_PAGE;
                pages_to_free -= pages_freed;
-               pages_freed += leak_balloon(vb, pages_to_free);
        }
        update_balloon_size(vb);
 
-       return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE;
+       return pages_freed;
+}
+
+static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
+                                                 struct shrink_control *sc)
+{
+       unsigned long pages_to_free, pages_freed = 0;
+       struct virtio_balloon *vb = container_of(shrinker,
+                                       struct virtio_balloon, shrinker);
+
+       pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+
+       if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+               pages_freed = shrink_free_pages(vb, pages_to_free);
+
+       if (pages_freed >= pages_to_free)
+               return pages_freed;
+
+       pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
+
+       return pages_freed;
 }
 
 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
@@ -540,8 +800,12 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
 {
        struct virtio_balloon *vb = container_of(shrinker,
                                        struct virtio_balloon, shrinker);
+       unsigned long count;
 
-       return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+       count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+       count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+
+       return count;
 }
 
 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
@@ -561,6 +825,7 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
        struct virtio_balloon *vb;
+       __u32 poison_val;
        int err;
 
        if (!vdev->config->get) {
@@ -604,6 +869,36 @@ static int virtballoon_probe(struct virtio_device *vdev)
        }
        vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
 #endif
+       if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+               /*
+                * There is always one entry reserved for cmd id, so the ring
+                * size needs to be at least two to report free page hints.
+                */
+               if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
+                       err = -ENOSPC;
+                       goto out_del_vqs;
+               }
+               vb->balloon_wq = alloc_workqueue("balloon-wq",
+                                       WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
+               if (!vb->balloon_wq) {
+                       err = -ENOMEM;
+                       goto out_del_vqs;
+               }
+               INIT_WORK(&vb->report_free_page_work, report_free_page_func);
+               vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP;
+               vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
+                                                 VIRTIO_BALLOON_CMD_ID_STOP);
+               vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
+                                                 VIRTIO_BALLOON_CMD_ID_STOP);
+               vb->num_free_page_blocks = 0;
+               spin_lock_init(&vb->free_page_list_lock);
+               INIT_LIST_HEAD(&vb->free_page_list);
+               if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
+                       memset(&poison_val, PAGE_POISON, sizeof(poison_val));
+                       virtio_cwrite(vb->vdev, struct virtio_balloon_config,
+                                     poison_val, &poison_val);
+               }
+       }
        /*
         * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
         * shrinker needs to be registered to relieve memory pressure.
@@ -611,7 +906,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
        if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
                err = virtio_balloon_register_shrinker(vb);
                if (err)
-                       goto out_del_vqs;
+                       goto out_del_balloon_wq;
        }
        virtio_device_ready(vdev);
 
@@ -619,6 +914,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
                virtballoon_changed(vdev);
        return 0;
 
+out_del_balloon_wq:
+       if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+               destroy_workqueue(vb->balloon_wq);
 out_del_vqs:
        vdev->config->del_vqs(vdev);
 out_free_vb:
@@ -652,6 +950,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
        cancel_work_sync(&vb->update_balloon_size_work);
        cancel_work_sync(&vb->update_balloon_stats_work);
 
+       if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+               cancel_work_sync(&vb->report_free_page_work);
+               destroy_workqueue(vb->balloon_wq);
+       }
+
        remove_common(vb);
 #ifdef CONFIG_BALLOON_COMPACTION
        if (vb->vb_dev_info.inode)
@@ -695,6 +998,9 @@ static int virtballoon_restore(struct virtio_device *vdev)
 
 static int virtballoon_validate(struct virtio_device *vdev)
 {
+       if (!page_poisoning_enabled())
+               __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
+
        __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
        return 0;
 }
@@ -703,6 +1009,8 @@ static unsigned int features[] = {
        VIRTIO_BALLOON_F_MUST_TELL_HOST,
        VIRTIO_BALLOON_F_STATS_VQ,
        VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+       VIRTIO_BALLOON_F_FREE_PAGE_HINT,
+       VIRTIO_BALLOON_F_PAGE_POISON,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
index f15f89df1f3653675da3b84b1c7ce47debd1bac6..7ea6fb6a2e5dd78c53a79bfea140e2e297858f7d 100644 (file)
@@ -914,7 +914,7 @@ int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args)
 
        ret = xenmem_reservation_increase(args->nr_pages, args->frames);
        if (ret != args->nr_pages) {
-               pr_debug("Failed to decrease reservation for DMA buffer\n");
+               pr_debug("Failed to increase reservation for DMA buffer\n");
                ret = -EFAULT;
        } else {
                ret = 0;
index df1ed37c3269ebd8170a21583676b80efe50a47f..de01a6d0059dc4adcb98a24197750f72b0b4ceaf 100644 (file)
 
 MODULE_LICENSE("GPL");
 
-static unsigned int limit = 64;
-module_param(limit, uint, 0644);
-MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by "
-                       "the privcmd-buf device per open file");
-
 struct privcmd_buf_private {
        struct mutex lock;
        struct list_head list;
-       unsigned int allocated;
 };
 
 struct privcmd_buf_vma_private {
@@ -60,13 +54,10 @@ static void privcmd_buf_vmapriv_free(struct privcmd_buf_vma_private *vma_priv)
 {
        unsigned int i;
 
-       vma_priv->file_priv->allocated -= vma_priv->n_pages;
-
        list_del(&vma_priv->list);
 
        for (i = 0; i < vma_priv->n_pages; i++)
-               if (vma_priv->pages[i])
-                       __free_page(vma_priv->pages[i]);
+               __free_page(vma_priv->pages[i]);
 
        kfree(vma_priv);
 }
@@ -146,8 +137,7 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
        unsigned int i;
        int ret = 0;
 
-       if (!(vma->vm_flags & VM_SHARED) || count > limit ||
-           file_priv->allocated + count > limit)
+       if (!(vma->vm_flags & VM_SHARED))
                return -EINVAL;
 
        vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
@@ -155,19 +145,15 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
        if (!vma_priv)
                return -ENOMEM;
 
-       vma_priv->n_pages = count;
-       count = 0;
-       for (i = 0; i < vma_priv->n_pages; i++) {
+       for (i = 0; i < count; i++) {
                vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
                if (!vma_priv->pages[i])
                        break;
-               count++;
+               vma_priv->n_pages++;
        }
 
        mutex_lock(&file_priv->lock);
 
-       file_priv->allocated += count;
-
        vma_priv->file_priv = file_priv;
        vma_priv->users = 1;
 
index b1092fbefa6309d2535b17b78979b6f3fa9b2b42..2e5d845b5091478252dfb1ed17395cf9c5ce870a 100644 (file)
@@ -137,13 +137,13 @@ static void pvcalls_conn_back_read(void *opaque)
        if (masked_prod < masked_cons) {
                vec[0].iov_base = data->in + masked_prod;
                vec[0].iov_len = wanted;
-               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted);
+               iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, wanted);
        } else {
                vec[0].iov_base = data->in + masked_prod;
                vec[0].iov_len = array_size - masked_prod;
                vec[1].iov_base = data->in;
                vec[1].iov_len = wanted - vec[0].iov_len;
-               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted);
+               iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, wanted);
        }
 
        atomic_set(&map->read, 0);
@@ -195,13 +195,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
        if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) {
                vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
                vec[0].iov_len = size;
-               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size);
+               iov_iter_kvec(&msg.msg_iter, READ, vec, 1, size);
        } else {
                vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
                vec[0].iov_len = array_size - pvcalls_mask(cons, array_size);
                vec[1].iov_base = data->out;
                vec[1].iov_len = size - vec[0].iov_len;
-               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size);
+               iov_iter_kvec(&msg.msg_iter, READ, vec, 2, size);
        }
 
        atomic_set(&map->write, 0);
index e1cbdfdb7c684fd24fdb6f25ee03f4e253e9ef58..0bcbcc20f76954e4e8e3d959628e87f3915b3ce3 100644 (file)
@@ -65,7 +65,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
        if (retval == 0)
                return retval;
 
-       iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE);
+       iov_iter_bvec(&to, READ, &bvec, 1, PAGE_SIZE);
 
        retval = p9_client_read(fid, page_offset(page), &to, &err);
        if (err) {
@@ -175,7 +175,7 @@ static int v9fs_vfs_writepage_locked(struct page *page)
        bvec.bv_page = page;
        bvec.bv_offset = 0;
        bvec.bv_len = len;
-       iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len);
+       iov_iter_bvec(&from, WRITE, &bvec, 1, len);
 
        /* We should have writeback_fid always set */
        BUG_ON(!v9inode->writeback_fid);
index cb6c4031af552b010c2e8ff5469b9088c769b3b7..00745147329dc9b6876404011378a8a29ef1791c 100644 (file)
@@ -123,7 +123,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
                if (rdir->tail == rdir->head) {
                        struct iov_iter to;
                        int n;
-                       iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen);
+                       iov_iter_kvec(&to, READ, &kvec, 1, buflen);
                        n = p9_client_read(file->private_data, ctx->pos, &to,
                                           &err);
                        if (err)
index 352abc39e891a1468d3576cc199fa13b089f32f7..ac8ff8ca4c115fa0ae52ec4c8fcbe47499780d94 100644 (file)
@@ -32,7 +32,7 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
        struct iov_iter to;
        int err;
 
-       iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size);
+       iov_iter_kvec(&to, READ, &kvec, 1, buffer_size);
 
        attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
        if (IS_ERR(attr_fid)) {
@@ -107,7 +107,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
        struct iov_iter from;
        int retval, err;
 
-       iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
+       iov_iter_kvec(&from, WRITE, &kvec, 1, value_len);
 
        p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
                 name, value_len, flags);
index ebba3b18e5da6dd5ff7d27cd248218937ab19a0f..701aaa9b18994a1e789adad6e7544817c320bc2b 100644 (file)
@@ -27,3 +27,15 @@ config AFS_FSCACHE
        help
          Say Y here if you want AFS data to be cached locally on disk through
          the generic filesystem cache manager
+
+config AFS_DEBUG_CURSOR
+       bool "AFS server cursor debugging"
+       depends on AFS_FS
+       help
+         Say Y here to cause the contents of a server cursor to be dumped to
+         the dmesg log if the server rotation algorithm fails to successfully
+         contact a server.
+
+         See <file:Documentation/filesystems/afs.txt> for more information.
+
+         If unsure, say N.
index 546874057bd3594bd0997d37b8801fd866461077..0738e2bf51936ed1e45ee84a0ed32a61e96c35f6 100644 (file)
@@ -17,6 +17,7 @@ kafs-y := \
        file.o \
        flock.o \
        fsclient.o \
+       fs_probe.o \
        inode.o \
        main.o \
        misc.o \
@@ -29,9 +30,13 @@ kafs-y := \
        super.o \
        netdevices.o \
        vlclient.o \
+       vl_list.o \
+       vl_probe.o \
+       vl_rotate.o \
        volume.o \
        write.o \
-       xattr.o
+       xattr.o \
+       yfsclient.o
 
 kafs-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_AFS_FS)  := kafs.o
index 55a756c60746ca7924625b99b0c2a42d429119fa..967db336d11ae016324f4f15d7cbd33b809045c2 100644 (file)
@@ -64,19 +64,25 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
 /*
  * Parse a text string consisting of delimited addresses.
  */
-struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
-                                          char delim,
-                                          unsigned short service,
-                                          unsigned short port)
+struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
+                                              const char *text, size_t len,
+                                              char delim,
+                                              unsigned short service,
+                                              unsigned short port)
 {
+       struct afs_vlserver_list *vllist;
        struct afs_addr_list *alist;
        const char *p, *end = text + len;
+       const char *problem;
        unsigned int nr = 0;
+       int ret = -ENOMEM;
 
        _enter("%*.*s,%c", (int)len, (int)len, text, delim);
 
-       if (!len)
+       if (!len) {
+               _leave(" = -EDESTADDRREQ [empty]");
                return ERR_PTR(-EDESTADDRREQ);
+       }
 
        if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
                delim = ',';
@@ -84,18 +90,24 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
        /* Count the addresses */
        p = text;
        do {
-               if (!*p)
-                       return ERR_PTR(-EINVAL);
+               if (!*p) {
+                       problem = "nul";
+                       goto inval;
+               }
                if (*p == delim)
                        continue;
                nr++;
                if (*p == '[') {
                        p++;
-                       if (p == end)
-                               return ERR_PTR(-EINVAL);
+                       if (p == end) {
+                               problem = "brace1";
+                               goto inval;
+                       }
                        p = memchr(p, ']', end - p);
-                       if (!p)
-                               return ERR_PTR(-EINVAL);
+                       if (!p) {
+                               problem = "brace2";
+                               goto inval;
+                       }
                        p++;
                        if (p >= end)
                                break;
@@ -109,10 +121,19 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 
        _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
 
-       alist = afs_alloc_addrlist(nr, service, port);
-       if (!alist)
+       vllist = afs_alloc_vlserver_list(1);
+       if (!vllist)
                return ERR_PTR(-ENOMEM);
 
+       vllist->nr_servers = 1;
+       vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT);
+       if (!vllist->servers[0].server)
+               goto error_vl;
+
+       alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
+       if (!alist)
+               goto error;
+
        /* Extract the addresses */
        p = text;
        do {
@@ -135,17 +156,21 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
                                        break;
                }
 
-               if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop))
+               if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) {
                        family = AF_INET;
-               else if (in6_pton(p, q - p, (u8 *)x, -1, &stop))
+               } else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) {
                        family = AF_INET6;
-               else
+               } else {
+                       problem = "family";
                        goto bad_address;
+               }
 
-               if (stop != q)
+               p = q;
+               if (stop != p) {
+                       problem = "nostop";
                        goto bad_address;
+               }
 
-               p = q;
                if (q < end && *q == ']')
                        p++;
 
@@ -154,18 +179,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
                                /* Port number specification "+1234" */
                                xport = 0;
                                p++;
-                               if (p >= end || !isdigit(*p))
+                               if (p >= end || !isdigit(*p)) {
+                                       problem = "port";
                                        goto bad_address;
+                               }
                                do {
                                        xport *= 10;
                                        xport += *p - '0';
-                                       if (xport > 65535)
+                                       if (xport > 65535) {
+                                               problem = "pval";
                                                goto bad_address;
+                                       }
                                        p++;
                                } while (p < end && isdigit(*p));
                        } else if (*p == delim) {
                                p++;
                        } else {
+                               problem = "weird";
                                goto bad_address;
                        }
                }
@@ -177,12 +207,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 
        } while (p < end);
 
+       rcu_assign_pointer(vllist->servers[0].server->addresses, alist);
        _leave(" = [nr %u]", alist->nr_addrs);
-       return alist;
+       return vllist;
 
-bad_address:
-       kfree(alist);
+inval:
+       _leave(" = -EINVAL [%s %zu %*.*s]",
+              problem, p - text, (int)len, (int)len, text);
        return ERR_PTR(-EINVAL);
+bad_address:
+       _leave(" = -EINVAL [%s %zu %*.*s]",
+              problem, p - text, (int)len, (int)len, text);
+       ret = -EINVAL;
+error:
+       afs_put_addrlist(alist);
+error_vl:
+       afs_put_vlserverlist(net, vllist);
+       return ERR_PTR(ret);
 }
 
 /*
@@ -201,30 +242,34 @@ static int afs_cmp_addr_list(const struct afs_addr_list *a1,
 /*
  * Perform a DNS query for VL servers and build a up an address list.
  */
-struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
 {
-       struct afs_addr_list *alist;
-       char *vllist = NULL;
+       struct afs_vlserver_list *vllist;
+       char *result = NULL;
        int ret;
 
        _enter("%s", cell->name);
 
-       ret = dns_query("afsdb", cell->name, cell->name_len,
-                       "", &vllist, _expiry);
-       if (ret < 0)
+       ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
+                       &result, _expiry);
+       if (ret < 0) {
+               _leave(" = %d [dns]", ret);
                return ERR_PTR(ret);
-
-       alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
-                                    VL_SERVICE, AFS_VL_PORT);
-       if (IS_ERR(alist)) {
-               kfree(vllist);
-               if (alist != ERR_PTR(-ENOMEM))
-                       pr_err("Failed to parse DNS data\n");
-               return alist;
        }
 
-       kfree(vllist);
-       return alist;
+       if (*_expiry == 0)
+               *_expiry = ktime_get_real_seconds() + 60;
+
+       if (ret > 1 && result[0] == 0)
+               vllist = afs_extract_vlserver_list(cell, result, ret);
+       else
+               vllist = afs_parse_text_addrs(cell->net, result, ret, ',',
+                                             VL_SERVICE, AFS_VL_PORT);
+       kfree(result);
+       if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM))
+               pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist));
+
+       return vllist;
 }
 
 /*
@@ -258,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
                        sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
 
        srx = &alist->addrs[i];
+       srx->srx_family = AF_RXRPC;
+       srx->transport_type = SOCK_DGRAM;
        srx->transport_len = sizeof(srx->transport.sin);
        srx->transport.sin.sin_family = AF_INET;
        srx->transport.sin.sin_port = htons(port);
@@ -296,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
                        sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
 
        srx = &alist->addrs[i];
+       srx->srx_family = AF_RXRPC;
+       srx->transport_type = SOCK_DGRAM;
        srx->transport_len = sizeof(srx->transport.sin6);
        srx->transport.sin6.sin6_family = AF_INET6;
        srx->transport.sin6.sin6_port = htons(port);
@@ -308,25 +357,33 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
  */
 bool afs_iterate_addresses(struct afs_addr_cursor *ac)
 {
-       _enter("%hu+%hd", ac->start, (short)ac->index);
+       unsigned long set, failed;
+       int index;
 
        if (!ac->alist)
                return false;
 
-       if (ac->begun) {
-               ac->index++;
-               if (ac->index == ac->alist->nr_addrs)
-                       ac->index = 0;
+       set = ac->alist->responded;
+       failed = ac->alist->failed;
+       _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
 
-               if (ac->index == ac->start) {
-                       ac->error = -EDESTADDRREQ;
-                       return false;
-               }
-       }
+       ac->nr_iterations++;
+
+       set &= ~(failed | ac->tried);
+
+       if (!set)
+               return false;
 
-       ac->begun = true;
+       index = READ_ONCE(ac->alist->preferred);
+       if (test_bit(index, &set))
+               goto selected;
+
+       index = __ffs(set);
+
+selected:
+       ac->index = index;
+       set_bit(index, &ac->tried);
        ac->responded = false;
-       ac->addr = &ac->alist->addrs[ac->index];
        return true;
 }
 
@@ -339,53 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
 
        alist = ac->alist;
        if (alist) {
-               if (ac->responded && ac->index != ac->start)
-                       WRITE_ONCE(alist->index, ac->index);
+               if (ac->responded &&
+                   ac->index != alist->preferred &&
+                   test_bit(ac->alist->preferred, &ac->tried))
+                       WRITE_ONCE(alist->preferred, ac->index);
                afs_put_addrlist(alist);
+               ac->alist = NULL;
        }
 
-       ac->addr = NULL;
-       ac->alist = NULL;
-       ac->begun = false;
        return ac->error;
 }
-
-/*
- * Set the address cursor for iterating over VL servers.
- */
-int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
-{
-       struct afs_addr_list *alist;
-       int ret;
-
-       if (!rcu_access_pointer(cell->vl_addrs)) {
-               ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
-                                 TASK_INTERRUPTIBLE);
-               if (ret < 0)
-                       return ret;
-
-               if (!rcu_access_pointer(cell->vl_addrs) &&
-                   ktime_get_real_seconds() < cell->dns_expiry)
-                       return cell->error;
-       }
-
-       read_lock(&cell->vl_addrs_lock);
-       alist = rcu_dereference_protected(cell->vl_addrs,
-                                         lockdep_is_held(&cell->vl_addrs_lock));
-       if (alist->nr_addrs > 0)
-               afs_get_addrlist(alist);
-       else
-               alist = NULL;
-       read_unlock(&cell->vl_addrs_lock);
-
-       if (!alist)
-               return -EDESTADDRREQ;
-
-       ac->alist = alist;
-       ac->addr = NULL;
-       ac->start = READ_ONCE(alist->index);
-       ac->index = ac->start;
-       ac->error = 0;
-       ac->begun = false;
-       return 0;
-}
index b4ff1f7ae4ab048a345bdbfae6ea895e31299abb..d12ffb457e4745809460707c02176d2e4a657e4b 100644 (file)
@@ -23,9 +23,9 @@
 #define AFSPATHMAX             1024    /* Maximum length of a pathname plus NUL */
 #define AFSOPAQUEMAX           1024    /* Maximum length of an opaque field */
 
-typedef unsigned                       afs_volid_t;
-typedef unsigned                       afs_vnodeid_t;
-typedef unsigned long long             afs_dataversion_t;
+typedef u64                    afs_volid_t;
+typedef u64                    afs_vnodeid_t;
+typedef u64                    afs_dataversion_t;
 
 typedef enum {
        AFSVL_RWVOL,                    /* read/write volume */
@@ -52,8 +52,9 @@ typedef enum {
  */
 struct afs_fid {
        afs_volid_t     vid;            /* volume ID */
-       afs_vnodeid_t   vnode;          /* file index within volume */
-       unsigned        unique;         /* unique ID number (file index version) */
+       afs_vnodeid_t   vnode;          /* Lower 64-bits of file index within volume */
+       u32             vnode_hi;       /* Upper 32-bits of file index */
+       u32             unique;         /* unique ID number (file index version) */
 };
 
 /*
@@ -67,14 +68,14 @@ typedef enum {
 } afs_callback_type_t;
 
 struct afs_callback {
+       time64_t                expires_at;     /* Time at which expires */
        unsigned                version;        /* Callback version */
-       unsigned                expiry;         /* Time at which expires */
        afs_callback_type_t     type;           /* Type of callback */
 };
 
 struct afs_callback_break {
        struct afs_fid          fid;            /* File identifier */
-       struct afs_callback     cb;             /* Callback details */
+       //struct afs_callback   cb;             /* Callback details */
 };
 
 #define AFSCBMAX 50    /* maximum callbacks transferred per bulk op */
@@ -129,19 +130,18 @@ typedef u32 afs_access_t;
 struct afs_file_status {
        u64                     size;           /* file size */
        afs_dataversion_t       data_version;   /* current data version */
-       time_t                  mtime_client;   /* last time client changed data */
-       time_t                  mtime_server;   /* last time server changed data */
-       unsigned                abort_code;     /* Abort if bulk-fetching this failed */
-
-       afs_file_type_t         type;           /* file type */
-       unsigned                nlink;          /* link count */
-       u32                     author;         /* author ID */
-       u32                     owner;          /* owner ID */
-       u32                     group;          /* group ID */
+       struct timespec64       mtime_client;   /* Last time client changed data */
+       struct timespec64       mtime_server;   /* Last time server changed data */
+       s64                     author;         /* author ID */
+       s64                     owner;          /* owner ID */
+       s64                     group;          /* group ID */
        afs_access_t            caller_access;  /* access rights for authenticated caller */
        afs_access_t            anon_access;    /* access rights for unauthenticated caller */
        umode_t                 mode;           /* UNIX mode */
+       afs_file_type_t         type;           /* file type */
+       u32                     nlink;          /* link count */
        s32                     lock_count;     /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
+       u32                     abort_code;     /* Abort if bulk-fetching this failed */
 };
 
 /*
@@ -158,25 +158,27 @@ struct afs_file_status {
  * AFS volume synchronisation information
  */
 struct afs_volsync {
-       time_t                  creation;       /* volume creation time */
+       time64_t                creation;       /* volume creation time */
 };
 
 /*
  * AFS volume status record
  */
 struct afs_volume_status {
-       u32                     vid;            /* volume ID */
-       u32                     parent_id;      /* parent volume ID */
+       afs_volid_t             vid;            /* volume ID */
+       afs_volid_t             parent_id;      /* parent volume ID */
        u8                      online;         /* true if volume currently online and available */
        u8                      in_service;     /* true if volume currently in service */
        u8                      blessed;        /* same as in_service */
        u8                      needs_salvage;  /* true if consistency checking required */
        u32                     type;           /* volume type (afs_voltype_t) */
-       u32                     min_quota;      /* minimum space set aside (blocks) */
-       u32                     max_quota;      /* maximum space this volume may occupy (blocks) */
-       u32                     blocks_in_use;  /* space this volume currently occupies (blocks) */
-       u32                     part_blocks_avail; /* space available in volume's partition */
-       u32                     part_max_blocks; /* size of volume's partition */
+       u64                     min_quota;      /* minimum space set aside (blocks) */
+       u64                     max_quota;      /* maximum space this volume may occupy (blocks) */
+       u64                     blocks_in_use;  /* space this volume currently occupies (blocks) */
+       u64                     part_blocks_avail; /* space available in volume's partition */
+       u64                     part_max_blocks; /* size of volume's partition */
+       s64                     vol_copy_date;
+       s64                     vol_backup_date;
 };
 
 #define AFS_BLOCK_SIZE 1024
index b1c31ec4523a897b0142ba8699ff48ac10f2d801..f6d0a21e8052f066482def18a8740c32032b731d 100644 (file)
@@ -49,7 +49,7 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
        struct afs_vnode *vnode = cookie_netfs_data;
        struct afs_vnode_cache_aux aux;
 
-       _enter("{%x,%x,%llx},%p,%u",
+       _enter("{%llx,%x,%llx},%p,%u",
               vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
               buffer, buflen);
 
index 5f261fbf2182b22a47fc93b7c6fee35f113e0097..1c7955f5cdaf2e776026390f615806f3e6ce535c 100644 (file)
@@ -210,12 +210,10 @@ void afs_init_callback_state(struct afs_server *server)
 /*
  * actually break a callback
  */
-void afs_break_callback(struct afs_vnode *vnode)
+void __afs_break_callback(struct afs_vnode *vnode)
 {
        _enter("");
 
-       write_seqlock(&vnode->cb_lock);
-
        clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
        if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
                vnode->cb_break++;
@@ -230,7 +228,12 @@ void afs_break_callback(struct afs_vnode *vnode)
                        afs_lock_may_be_available(vnode);
                spin_unlock(&vnode->lock);
        }
+}
 
+void afs_break_callback(struct afs_vnode *vnode)
+{
+       write_seqlock(&vnode->cb_lock);
+       __afs_break_callback(vnode);
        write_sequnlock(&vnode->cb_lock);
 }
 
@@ -310,14 +313,10 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
        /* TODO: Sort the callback break list by volume ID */
 
        for (; count > 0; callbacks++, count--) {
-               _debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
+               _debug("- Fid { vl=%08llx n=%llu u=%u }",
                       callbacks->fid.vid,
                       callbacks->fid.vnode,
-                      callbacks->fid.unique,
-                      callbacks->cb.version,
-                      callbacks->cb.expiry,
-                      callbacks->cb.type
-                      );
+                      callbacks->fid.unique);
                afs_break_one_callback(server, &callbacks->fid);
        }
 
index 6127f0fcd62c4e376bd2554c1003aedb40aab471..cf445dbd5f2e05d4c716dadb3123fb397537d4e6 100644 (file)
@@ -20,6 +20,8 @@
 #include "internal.h"
 
 static unsigned __read_mostly afs_cell_gc_delay = 10;
+static unsigned __read_mostly afs_cell_min_ttl = 10 * 60;
+static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60;
 
 static void afs_manage_cell(struct work_struct *);
 
@@ -119,7 +121,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
  */
 static struct afs_cell *afs_alloc_cell(struct afs_net *net,
                                       const char *name, unsigned int namelen,
-                                      const char *vllist)
+                                      const char *addresses)
 {
        struct afs_cell *cell;
        int i, ret;
@@ -134,7 +136,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
        if (namelen == 5 && memcmp(name, "@cell", 5) == 0)
                return ERR_PTR(-EINVAL);
 
-       _enter("%*.*s,%s", namelen, namelen, name, vllist);
+       _enter("%*.*s,%s", namelen, namelen, name, addresses);
 
        cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
        if (!cell) {
@@ -153,23 +155,26 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
                       (1 << AFS_CELL_FL_NO_LOOKUP_YET));
        INIT_LIST_HEAD(&cell->proc_volumes);
        rwlock_init(&cell->proc_lock);
-       rwlock_init(&cell->vl_addrs_lock);
+       rwlock_init(&cell->vl_servers_lock);
 
        /* Fill in the VL server list if we were given a list of addresses to
         * use.
         */
-       if (vllist) {
-               struct afs_addr_list *alist;
-
-               alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
-                                            VL_SERVICE, AFS_VL_PORT);
-               if (IS_ERR(alist)) {
-                       ret = PTR_ERR(alist);
+       if (addresses) {
+               struct afs_vlserver_list *vllist;
+
+               vllist = afs_parse_text_addrs(net,
+                                             addresses, strlen(addresses), ':',
+                                             VL_SERVICE, AFS_VL_PORT);
+               if (IS_ERR(vllist)) {
+                       ret = PTR_ERR(vllist);
                        goto parse_failed;
                }
 
-               rcu_assign_pointer(cell->vl_addrs, alist);
+               rcu_assign_pointer(cell->vl_servers, vllist);
                cell->dns_expiry = TIME64_MAX;
+       } else {
+               cell->dns_expiry = ktime_get_real_seconds();
        }
 
        _leave(" = %p", cell);
@@ -356,26 +361,40 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
  */
 static void afs_update_cell(struct afs_cell *cell)
 {
-       struct afs_addr_list *alist, *old;
-       time64_t now, expiry;
+       struct afs_vlserver_list *vllist, *old;
+       unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl);
+       unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl);
+       time64_t now, expiry = 0;
 
        _enter("%s", cell->name);
 
-       alist = afs_dns_query(cell, &expiry);
-       if (IS_ERR(alist)) {
-               switch (PTR_ERR(alist)) {
+       vllist = afs_dns_query(cell, &expiry);
+
+       now = ktime_get_real_seconds();
+       if (min_ttl > max_ttl)
+               max_ttl = min_ttl;
+       if (expiry < now + min_ttl)
+               expiry = now + min_ttl;
+       else if (expiry > now + max_ttl)
+               expiry = now + max_ttl;
+
+       if (IS_ERR(vllist)) {
+               switch (PTR_ERR(vllist)) {
                case -ENODATA:
-                       /* The DNS said that the cell does not exist */
+               case -EDESTADDRREQ:
+                       /* The DNS said that the cell does not exist or there
+                        * weren't any addresses to be had.
+                        */
                        set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
                        clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-                       cell->dns_expiry = ktime_get_real_seconds() + 61;
+                       cell->dns_expiry = expiry;
                        break;
 
                case -EAGAIN:
                case -ECONNREFUSED:
                default:
                        set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-                       cell->dns_expiry = ktime_get_real_seconds() + 10;
+                       cell->dns_expiry = now + 10;
                        break;
                }
 
@@ -387,12 +406,12 @@ static void afs_update_cell(struct afs_cell *cell)
                /* Exclusion on changing vl_addrs is achieved by a
                 * non-reentrant work item.
                 */
-               old = rcu_dereference_protected(cell->vl_addrs, true);
-               rcu_assign_pointer(cell->vl_addrs, alist);
+               old = rcu_dereference_protected(cell->vl_servers, true);
+               rcu_assign_pointer(cell->vl_servers, vllist);
                cell->dns_expiry = expiry;
 
                if (old)
-                       afs_put_addrlist(old);
+                       afs_put_vlserverlist(cell->net, old);
        }
 
        if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
@@ -414,7 +433,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
 
        ASSERTCMP(atomic_read(&cell->usage), ==, 0);
 
-       afs_put_addrlist(rcu_access_pointer(cell->vl_addrs));
+       afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
        key_put(cell->anonymous_key);
        kfree(cell);
 
index 9e51d6fe7e8f975f34f877217a28a8e99bcfa5e4..8ee5972893ed5a75583bfb2821a42636403ee086 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/ip.h>
 #include "internal.h"
 #include "afs_cm.h"
+#include "protocol_yfs.h"
 
 static int afs_deliver_cb_init_call_back_state(struct afs_call *);
 static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
@@ -30,6 +31,8 @@ static void SRXAFSCB_Probe(struct work_struct *);
 static void SRXAFSCB_ProbeUuid(struct work_struct *);
 static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
 
+static int afs_deliver_yfs_cb_callback(struct afs_call *);
+
 #define CM_NAME(name) \
        const char afs_SRXCB##name##_name[] __tracepoint_string =       \
                "CB." #name
@@ -100,13 +103,26 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
        .work           = SRXAFSCB_TellMeAboutYourself,
 };
 
+/*
+ * YFS CB.CallBack operation type
+ */
+static CM_NAME(YFS_CallBack);
+static const struct afs_call_type afs_SRXYFSCB_CallBack = {
+       .name           = afs_SRXCBYFS_CallBack_name,
+       .deliver        = afs_deliver_yfs_cb_callback,
+       .destructor     = afs_cm_destructor,
+       .work           = SRXAFSCB_CallBack,
+};
+
 /*
  * route an incoming cache manager call
  * - return T if supported, F if not
  */
 bool afs_cm_incoming_call(struct afs_call *call)
 {
-       _enter("{CB.OP %u}", call->operation_ID);
+       _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
+
+       call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall);
 
        switch (call->operation_ID) {
        case CBCallBack:
@@ -127,11 +143,101 @@ bool afs_cm_incoming_call(struct afs_call *call)
        case CBTellMeAboutYourself:
                call->type = &afs_SRXCBTellMeAboutYourself;
                return true;
+       case YFSCBCallBack:
+               if (call->service_id != YFS_CM_SERVICE)
+                       return false;
+               call->type = &afs_SRXYFSCB_CallBack;
+               return true;
        default:
                return false;
        }
 }
 
+/*
+ * Record a probe to the cache manager from a server.
+ */
+static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server)
+{
+       _enter("");
+
+       if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
+           !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) {
+               if (server->cm_epoch == call->epoch)
+                       return 0;
+
+               if (!server->probe.said_rebooted) {
+                       pr_notice("kAFS: FS rebooted %pU\n", &server->uuid);
+                       server->probe.said_rebooted = true;
+               }
+       }
+
+       spin_lock(&server->probe_lock);
+
+       if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
+               server->cm_epoch = call->epoch;
+               server->probe.cm_epoch = call->epoch;
+               goto out;
+       }
+
+       if (server->probe.cm_probed &&
+           call->epoch != server->probe.cm_epoch &&
+           !server->probe.said_inconsistent) {
+               pr_notice("kAFS: FS endpoints inconsistent %pU\n",
+                         &server->uuid);
+               server->probe.said_inconsistent = true;
+       }
+
+       if (!server->probe.cm_probed || call->epoch == server->cm_epoch)
+               server->probe.cm_epoch = server->cm_epoch;
+
+out:
+       server->probe.cm_probed = true;
+       spin_unlock(&server->probe_lock);
+       return 0;
+}
+
+/*
+ * Find the server record by peer address and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_peer(struct afs_call *call)
+{
+       struct sockaddr_rxrpc srx;
+       struct afs_server *server;
+
+       rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+
+       server = afs_find_server(call->net, &srx);
+       if (!server) {
+               trace_afs_cm_no_server(call, &srx);
+               return 0;
+       }
+
+       call->cm_server = server;
+       return afs_record_cm_probe(call, server);
+}
+
+/*
+ * Find the server record by server UUID and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_uuid(struct afs_call *call,
+                                     struct afs_uuid *uuid)
+{
+       struct afs_server *server;
+
+       rcu_read_lock();
+       server = afs_find_server_by_uuid(call->net, call->request);
+       rcu_read_unlock();
+       if (!server) {
+               trace_afs_cm_no_server_u(call, call->request);
+               return 0;
+       }
+
+       call->cm_server = server;
+       return afs_record_cm_probe(call, server);
+}
+
 /*
  * Clean up a cache manager call.
  */
@@ -168,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
 static int afs_deliver_cb_callback(struct afs_call *call)
 {
        struct afs_callback_break *cb;
-       struct sockaddr_rxrpc srx;
        __be32 *bp;
        int ret, loop;
 
@@ -176,32 +281,32 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* extract the FID array and its count in two steps */
        case 1:
                _debug("extract FID count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count = ntohl(call->tmp);
                _debug("FID count: %u", call->count);
                if (call->count > AFSCBMAX)
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_cb_fid_count);
 
                call->buffer = kmalloc(array3_size(call->count, 3, 4),
                                       GFP_KERNEL);
                if (!call->buffer)
                        return -ENOMEM;
-               call->offset = 0;
+               afs_extract_to_buf(call, call->count * 3 * 4);
                call->unmarshall++;
 
        case 2:
                _debug("extract FID array");
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count * 3 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -218,59 +323,46 @@ static int afs_deliver_cb_callback(struct afs_call *call)
                        cb->fid.vid     = ntohl(*bp++);
                        cb->fid.vnode   = ntohl(*bp++);
                        cb->fid.unique  = ntohl(*bp++);
-                       cb->cb.type     = AFSCM_CB_UNTYPED;
                }
 
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* extract the callback array and its count in two steps */
        case 3:
                _debug("extract CB count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count2 = ntohl(call->tmp);
                _debug("CB count: %u", call->count2);
                if (call->count2 != call->count && call->count2 != 0)
-                       return afs_protocol_error(call, -EBADMSG);
-               call->offset = 0;
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_cb_count);
+               call->_iter = &call->iter;
+               iov_iter_discard(&call->iter, READ, call->count2 * 3 * 4);
                call->unmarshall++;
 
        case 4:
-               _debug("extract CB array");
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count2 * 3 * 4, false);
+               _debug("extract discard %zu/%u",
+                      iov_iter_count(&call->iter), call->count2 * 3 * 4);
+
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
-               _debug("unmarshall CB array");
-               cb = call->request;
-               bp = call->buffer;
-               for (loop = call->count2; loop > 0; loop--, cb++) {
-                       cb->cb.version  = ntohl(*bp++);
-                       cb->cb.expiry   = ntohl(*bp++);
-                       cb->cb.type     = ntohl(*bp++);
-               }
-
-               call->offset = 0;
                call->unmarshall++;
        case 5:
                break;
        }
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
+               return afs_io_error(call, afs_io_error_cm_reply);
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-       call->cm_server = afs_find_server(call->net, &srx);
-       if (!call->cm_server)
-               trace_afs_cm_no_server(call, &srx);
-
-       return afs_queue_call_work(call);
+       return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -294,24 +386,18 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
  */
 static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 {
-       struct sockaddr_rxrpc srx;
        int ret;
 
        _enter("");
 
-       rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-
-       ret = afs_extract_data(call, NULL, 0, false);
+       afs_extract_discard(call, 0);
+       ret = afs_extract_data(call, false);
        if (ret < 0)
                return ret;
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       call->cm_server = afs_find_server(call->net, &srx);
-       if (!call->cm_server)
-               trace_afs_cm_no_server(call, &srx);
-
-       return afs_queue_call_work(call);
+       return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -330,16 +416,15 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
                call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
                if (!call->buffer)
                        return -ENOMEM;
+               afs_extract_to_buf(call, 11 * sizeof(__be32));
                call->unmarshall++;
 
        case 1:
                _debug("extract UUID");
-               ret = afs_extract_data(call, call->buffer,
-                                      11 * sizeof(__be32), false);
+               ret = afs_extract_data(call, false);
                switch (ret) {
                case 0:         break;
                case -EAGAIN:   return 0;
@@ -362,7 +447,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
                for (loop = 0; loop < 6; loop++)
                        r->node[loop] = ntohl(b[loop + 5]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 2:
@@ -370,17 +454,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
        }
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
+               return afs_io_error(call, afs_io_error_cm_reply);
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       rcu_read_lock();
-       call->cm_server = afs_find_server_by_uuid(call->net, call->request);
-       rcu_read_unlock();
-       if (!call->cm_server)
-               trace_afs_cm_no_server_u(call, call->request);
-
-       return afs_queue_call_work(call);
+       return afs_find_cm_server_by_uuid(call, call->request);
 }
 
 /*
@@ -405,14 +483,14 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 
        _enter("");
 
-       ret = afs_extract_data(call, NULL, 0, false);
+       afs_extract_discard(call, 0);
+       ret = afs_extract_data(call, false);
        if (ret < 0)
                return ret;
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
-
-       return afs_queue_call_work(call);
+               return afs_io_error(call, afs_io_error_cm_reply);
+       return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -453,16 +531,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
                call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
                if (!call->buffer)
                        return -ENOMEM;
+               afs_extract_to_buf(call, 11 * sizeof(__be32));
                call->unmarshall++;
 
        case 1:
                _debug("extract UUID");
-               ret = afs_extract_data(call, call->buffer,
-                                      11 * sizeof(__be32), false);
+               ret = afs_extract_data(call, false);
                switch (ret) {
                case 0:         break;
                case -EAGAIN:   return 0;
@@ -485,7 +562,6 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
                for (loop = 0; loop < 6; loop++)
                        r->node[loop] = ntohl(b[loop + 5]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 2:
@@ -493,9 +569,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
        }
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
-
-       return afs_queue_call_work(call);
+               return afs_io_error(call, afs_io_error_cm_reply);
+       return afs_find_cm_server_by_uuid(call, call->request);
 }
 
 /*
@@ -570,12 +645,88 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 
        _enter("");
 
-       ret = afs_extract_data(call, NULL, 0, false);
+       afs_extract_discard(call, 0);
+       ret = afs_extract_data(call, false);
        if (ret < 0)
                return ret;
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
+               return afs_io_error(call, afs_io_error_cm_reply);
+       return afs_find_cm_server_by_peer(call);
+}
+
+/*
+ * deliver request data to a YFS CB.CallBack call
+ */
+static int afs_deliver_yfs_cb_callback(struct afs_call *call)
+{
+       struct afs_callback_break *cb;
+       struct yfs_xdr_YFSFid *bp;
+       size_t size;
+       int ret, loop;
+
+       _enter("{%u}", call->unmarshall);
+
+       switch (call->unmarshall) {
+       case 0:
+               afs_extract_to_tmp(call);
+               call->unmarshall++;
+
+               /* extract the FID array and its count in two steps */
+       case 1:
+               _debug("extract FID count");
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               call->count = ntohl(call->tmp);
+               _debug("FID count: %u", call->count);
+               if (call->count > YFSCBMAX)
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_cb_fid_count);
+
+               size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid));
+               call->buffer = kmalloc(size, GFP_KERNEL);
+               if (!call->buffer)
+                       return -ENOMEM;
+               afs_extract_to_buf(call, size);
+               call->unmarshall++;
+
+       case 2:
+               _debug("extract FID array");
+               ret = afs_extract_data(call, false);
+               if (ret < 0)
+                       return ret;
+
+               _debug("unmarshall FID array");
+               call->request = kcalloc(call->count,
+                                       sizeof(struct afs_callback_break),
+                                       GFP_KERNEL);
+               if (!call->request)
+                       return -ENOMEM;
+
+               cb = call->request;
+               bp = call->buffer;
+               for (loop = call->count; loop > 0; loop--, cb++) {
+                       cb->fid.vid     = xdr_to_u64(bp->volume);
+                       cb->fid.vnode   = xdr_to_u64(bp->vnode.lo);
+                       cb->fid.vnode_hi = ntohl(bp->vnode.hi);
+                       cb->fid.unique  = ntohl(bp->vnode.unique);
+                       bp++;
+               }
+
+               afs_extract_to_tmp(call);
+               call->unmarshall++;
+
+       case 3:
+               break;
+       }
+
+       if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+               return afs_io_error(call, afs_io_error_cm_reply);
 
-       return afs_queue_call_work(call);
+       /* We'll need the file server record as that tells us which set of
+        * vnodes to operate upon.
+        */
+       return afs_find_cm_server_by_peer(call);
 }
index 855bf2b79fed4117559f6f011cacd3b43f74b927..43dea3b00c29b9dc93b6416e8e869b4f8d726c28 100644 (file)
@@ -138,6 +138,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
                               ntohs(dbuf->blocks[tmp].hdr.magic));
                        trace_afs_dir_check_failed(dvnode, off, i_size);
                        kunmap(page);
+                       trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
                        goto error;
                }
 
@@ -190,9 +191,11 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 retry:
        i_size = i_size_read(&dvnode->vfs_inode);
        if (i_size < 2048)
-               return ERR_PTR(-EIO);
-       if (i_size > 2048 * 1024)
+               return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small));
+       if (i_size > 2048 * 1024) {
+               trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
                return ERR_PTR(-EFBIG);
+       }
 
        _enter("%llu", i_size);
 
@@ -315,7 +318,8 @@ content_has_grown:
 /*
  * deal with one block in an AFS directory
  */
-static int afs_dir_iterate_block(struct dir_context *ctx,
+static int afs_dir_iterate_block(struct afs_vnode *dvnode,
+                                struct dir_context *ctx,
                                 union afs_xdr_dir_block *block,
                                 unsigned blkoff)
 {
@@ -365,7 +369,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
                                       " (len %u/%zu)",
                                       blkoff / sizeof(union afs_xdr_dir_block),
                                       offset, next, tmp, nlen);
-                               return -EIO;
+                               return afs_bad(dvnode, afs_file_error_dir_over_end);
                        }
                        if (!(block->hdr.bitmap[next / 8] &
                              (1 << (next % 8)))) {
@@ -373,7 +377,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
                                       " %u unmarked extension (len %u/%zu)",
                                       blkoff / sizeof(union afs_xdr_dir_block),
                                       offset, next, tmp, nlen);
-                               return -EIO;
+                               return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
                        }
 
                        _debug("ENT[%zu.%u]: ext %u/%zu",
@@ -442,7 +446,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
                 */
                page = req->pages[blkoff / PAGE_SIZE];
                if (!page) {
-                       ret = -EIO;
+                       ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
                        break;
                }
                mark_page_accessed(page);
@@ -455,7 +459,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
                do {
                        dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) /
                                               sizeof(union afs_xdr_dir_block)];
-                       ret = afs_dir_iterate_block(ctx, dblock, blkoff);
+                       ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff);
                        if (ret != 1) {
                                kunmap(page);
                                goto out;
@@ -548,7 +552,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
        }
 
        *fid = cookie.fid;
-       _leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique);
+       _leave(" = 0 { vn=%llu u=%u }", fid->vnode, fid->unique);
        return 0;
 }
 
@@ -826,7 +830,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
        struct key *key;
        int ret;
 
-       _enter("{%x:%u},%p{%pd},",
+       _enter("{%llx:%llu},%p{%pd},",
               dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry);
 
        ASSERTCMP(d_inode(dentry), ==, NULL);
@@ -896,7 +900,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
        if (d_really_is_positive(dentry)) {
                vnode = AFS_FS_I(d_inode(dentry));
-               _enter("{v={%x:%u} n=%pd fl=%lx},",
+               _enter("{v={%llx:%llu} n=%pd fl=%lx},",
                       vnode->fid.vid, vnode->fid.vnode, dentry,
                       vnode->flags);
        } else {
@@ -965,7 +969,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
                /* if the vnode ID has changed, then the dirent points to a
                 * different file */
                if (fid.vnode != vnode->fid.vnode) {
-                       _debug("%pd: dirent changed [%u != %u]",
+                       _debug("%pd: dirent changed [%llu != %llu]",
                               dentry, fid.vnode,
                               vnode->fid.vnode);
                        goto not_found;
@@ -1085,6 +1089,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
 
        vnode = AFS_FS_I(inode);
        set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+       afs_vnode_commit_status(fc, vnode, 0);
        d_add(new_dentry, inode);
 }
 
@@ -1104,7 +1109,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
        mode |= S_IFDIR;
 
-       _enter("{%x:%u},{%pd},%ho",
+       _enter("{%llx:%llu},{%pd},%ho",
               dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
        key = afs_request_key(dvnode->volume->cell);
@@ -1169,12 +1174,12 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
 static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 {
        struct afs_fs_cursor fc;
-       struct afs_vnode *dvnode = AFS_FS_I(dir);
+       struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
        struct key *key;
        u64 data_version = dvnode->status.data_version;
        int ret;
 
-       _enter("{%x:%u},{%pd}",
+       _enter("{%llx:%llu},{%pd}",
               dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
        key = afs_request_key(dvnode->volume->cell);
@@ -1183,11 +1188,19 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
                goto error;
        }
 
+       /* Try to make sure we have a callback promise on the victim. */
+       if (d_really_is_positive(dentry)) {
+               vnode = AFS_FS_I(d_inode(dentry));
+               ret = afs_validate(vnode, key);
+               if (ret < 0)
+                       goto error_key;
+       }
+
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                while (afs_select_fileserver(&fc)) {
                        fc.cb_break = afs_calc_vnode_cb_break(dvnode);
-                       afs_fs_remove(&fc, dentry->d_name.name, true,
+                       afs_fs_remove(&fc, vnode, dentry->d_name.name, true,
                                      data_version);
                }
 
@@ -1201,6 +1214,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
                }
        }
 
+error_key:
        key_put(key);
 error:
        return ret;
@@ -1231,7 +1245,9 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
        if (d_really_is_positive(dentry)) {
                struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 
-               if (dir_valid) {
+               if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+                       /* Already done */
+               } else if (dir_valid) {
                        drop_nlink(&vnode->vfs_inode);
                        if (vnode->vfs_inode.i_nlink == 0) {
                                set_bit(AFS_VNODE_DELETED, &vnode->flags);
@@ -1260,13 +1276,13 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
 static int afs_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct afs_fs_cursor fc;
-       struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
+       struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
        struct key *key;
        unsigned long d_version = (unsigned long)dentry->d_fsdata;
        u64 data_version = dvnode->status.data_version;
        int ret;
 
-       _enter("{%x:%u},{%pd}",
+       _enter("{%llx:%llu},{%pd}",
               dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
        if (dentry->d_name.len >= AFSNAMEMAX)
@@ -1290,7 +1306,18 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
        if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                while (afs_select_fileserver(&fc)) {
                        fc.cb_break = afs_calc_vnode_cb_break(dvnode);
-                       afs_fs_remove(&fc, dentry->d_name.name, false,
+
+                       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
+                           !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
+                               yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
+                                                   data_version);
+                               if (fc.ac.error != -ECONNABORTED ||
+                                   fc.ac.abort_code != RXGEN_OPCODE)
+                                       continue;
+                               set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
+                       }
+
+                       afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
                                      data_version);
                }
 
@@ -1330,7 +1357,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
        mode |= S_IFREG;
 
-       _enter("{%x:%u},{%pd},%ho,",
+       _enter("{%llx:%llu},{%pd},%ho,",
               dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
        ret = -ENAMETOOLONG;
@@ -1393,7 +1420,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
        dvnode = AFS_FS_I(dir);
        data_version = dvnode->status.data_version;
 
-       _enter("{%x:%u},{%x:%u},{%pd}",
+       _enter("{%llx:%llu},{%llx:%llu},{%pd}",
               vnode->fid.vid, vnode->fid.vnode,
               dvnode->fid.vid, dvnode->fid.vnode,
               dentry);
@@ -1464,7 +1491,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
        u64 data_version = dvnode->status.data_version;
        int ret;
 
-       _enter("{%x:%u},{%pd},%s",
+       _enter("{%llx:%llu},{%pd},%s",
               dvnode->fid.vid, dvnode->fid.vnode, dentry,
               content);
 
@@ -1540,7 +1567,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
        orig_data_version = orig_dvnode->status.data_version;
        new_data_version = new_dvnode->status.data_version;
 
-       _enter("{%x:%u},{%x:%u},{%x:%u},{%pd}",
+       _enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}",
               orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
               vnode->fid.vid, vnode->fid.vnode,
               new_dvnode->fid.vid, new_dvnode->fid.vnode,
@@ -1607,7 +1634,7 @@ static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
 {
        struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
 
-       _enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
+       _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
 
        set_page_private(page, 0);
        ClearPagePrivate(page);
index f29c6dade7f6250348b886b44b8be150199f78f7..a9ba81ddf1546272d4a5cbb7e0885326c250c6ff 100644 (file)
@@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry)
                return 0;
        }
 
-       ret = dns_query("afsdb", name, len, "", NULL, NULL);
+       ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL);
        if (ret == -ENODATA)
                ret = -EDESTADDRREQ;
        return ret;
@@ -62,7 +62,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
        struct inode *inode;
        int ret = -ENOENT;
 
-       _enter("%p{%pd}, {%x:%u}",
+       _enter("%p{%pd}, {%llx:%llu}",
               dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
 
        if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
index 7d4f26198573d7f6a4dffb7ff4a82ee0f8fbb573..d6bc3f5d784b5676185070ae208345dd2eef3325 100644 (file)
@@ -121,7 +121,7 @@ int afs_open(struct inode *inode, struct file *file)
        struct key *key;
        int ret;
 
-       _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
 
        key = afs_request_key(vnode->volume->cell);
        if (IS_ERR(key)) {
@@ -170,7 +170,7 @@ int afs_release(struct inode *inode, struct file *file)
        struct afs_vnode *vnode = AFS_FS_I(inode);
        struct afs_file *af = file->private_data;
 
-       _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
 
        if ((file->f_mode & FMODE_WRITE))
                return vfs_fsync(file, 0);
@@ -228,7 +228,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s{%x:%u.%u},%x,,,",
+       _enter("%s{%llx:%llu.%u},%x,,,",
               vnode->volume->name,
               vnode->fid.vid,
               vnode->fid.vnode,
@@ -634,7 +634,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
        struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
        unsigned long priv;
 
-       _enter("{{%x:%u}[%lu],%lx},%x",
+       _enter("{{%llx:%llu}[%lu],%lx},%x",
               vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
               gfp_flags);
 
index dc62d15a964b8809d7028d33a393c41b6963242b..0568fd98682109e0dd686d5097bf9fe2e40ecfe1 100644 (file)
@@ -29,7 +29,7 @@ static const struct file_lock_operations afs_lock_ops = {
  */
 void afs_lock_may_be_available(struct afs_vnode *vnode)
 {
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
        queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
 }
@@ -76,7 +76,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s{%x:%u.%u},%x,%u",
+       _enter("%s{%llx:%llu.%u},%x,%u",
               vnode->volume->name,
               vnode->fid.vid,
               vnode->fid.vnode,
@@ -107,7 +107,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s{%x:%u.%u},%x",
+       _enter("%s{%llx:%llu.%u},%x",
               vnode->volume->name,
               vnode->fid.vid,
               vnode->fid.vnode,
@@ -138,7 +138,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s{%x:%u.%u},%x",
+       _enter("%s{%llx:%llu.%u},%x",
               vnode->volume->name,
               vnode->fid.vid,
               vnode->fid.vnode,
@@ -175,7 +175,7 @@ void afs_lock_work(struct work_struct *work)
        struct key *key;
        int ret;
 
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
        spin_lock(&vnode->lock);
 
@@ -192,7 +192,7 @@ again:
                ret = afs_release_lock(vnode, vnode->lock_key);
                if (ret < 0)
                        printk(KERN_WARNING "AFS:"
-                              " Failed to release lock on {%x:%x} error %d\n",
+                              " Failed to release lock on {%llx:%llx} error %d\n",
                               vnode->fid.vid, vnode->fid.vnode, ret);
 
                spin_lock(&vnode->lock);
@@ -229,7 +229,7 @@ again:
                key_put(key);
 
                if (ret < 0)
-                       pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
+                       pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
                                   vnode->fid.vid, vnode->fid.vnode, ret);
 
                spin_lock(&vnode->lock);
@@ -430,7 +430,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
        struct key *key = afs_file_key(file);
        int ret;
 
-       _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+       _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
 
        /* only whole-file locks are supported */
        if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
@@ -582,7 +582,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
        struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
        int ret;
 
-       _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+       _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
 
        /* Flush all pending writes before doing anything with locks. */
        vfs_fsync(file, 0);
@@ -639,7 +639,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 {
        struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 
-       _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
+       _enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
               vnode->fid.vid, vnode->fid.vnode, cmd,
               fl->fl_type, fl->fl_flags,
               (long long) fl->fl_start, (long long) fl->fl_end);
@@ -662,7 +662,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 {
        struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 
-       _enter("{%x:%u},%d,{t=%x,fl=%x}",
+       _enter("{%llx:%llu},%d,{t=%x,fl=%x}",
               vnode->fid.vid, vnode->fid.vnode, cmd,
               fl->fl_type, fl->fl_flags);
 
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
new file mode 100644 (file)
index 0000000..d049cb4
--- /dev/null
@@ -0,0 +1,270 @@
+/* AFS fileserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+static bool afs_fs_probe_done(struct afs_server *server)
+{
+       if (!atomic_dec_and_test(&server->probe_outstanding))
+               return false;
+
+       wake_up_var(&server->probe_outstanding);
+       clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
+       wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
+       return true;
+}
+
+/*
+ * Process the result of probing a fileserver.  This is called after successful
+ * or failed delivery of an FS.GetCapabilities operation.
+ */
+void afs_fileserver_probe_result(struct afs_call *call)
+{
+       struct afs_addr_list *alist = call->alist;
+       struct afs_server *server = call->reply[0];
+       unsigned int server_index = (long)call->reply[1];
+       unsigned int index = call->addr_ix;
+       unsigned int rtt = UINT_MAX;
+       bool have_result = false;
+       u64 _rtt;
+       int ret = call->error;
+
+       _enter("%pU,%u", &server->uuid, index);
+
+       spin_lock(&server->probe_lock);
+
+       switch (ret) {
+       case 0:
+               server->probe.error = 0;
+               goto responded;
+       case -ECONNABORTED:
+               if (!server->probe.responded) {
+                       server->probe.abort_code = call->abort_code;
+                       server->probe.error = ret;
+               }
+               goto responded;
+       case -ENOMEM:
+       case -ENONET:
+               server->probe.local_failure = true;
+               afs_io_error(call, afs_io_error_fs_probe_fail);
+               goto out;
+       case -ECONNRESET: /* Responded, but call expired. */
+       case -ENETUNREACH:
+       case -EHOSTUNREACH:
+       case -ECONNREFUSED:
+       case -ETIMEDOUT:
+       case -ETIME:
+       default:
+               clear_bit(index, &alist->responded);
+               set_bit(index, &alist->failed);
+               if (!server->probe.responded &&
+                   (server->probe.error == 0 ||
+                    server->probe.error == -ETIMEDOUT ||
+                    server->probe.error == -ETIME))
+                       server->probe.error = ret;
+               afs_io_error(call, afs_io_error_fs_probe_fail);
+               goto out;
+       }
+
+responded:
+       set_bit(index, &alist->responded);
+       clear_bit(index, &alist->failed);
+
+       if (call->service_id == YFS_FS_SERVICE) {
+               server->probe.is_yfs = true;
+               set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+               alist->addrs[index].srx_service = call->service_id;
+       } else {
+               server->probe.not_yfs = true;
+               if (!server->probe.is_yfs) {
+                       clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+                       alist->addrs[index].srx_service = call->service_id;
+               }
+       }
+
+       /* Get the RTT and scale it to fit into a 32-bit value that represents
+        * over a minute of time so that we can access it with one instruction
+        * on a 32-bit system.
+        */
+       _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
+       _rtt /= 64;
+       rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
+       if (rtt < server->probe.rtt) {
+               server->probe.rtt = rtt;
+               alist->preferred = index;
+               have_result = true;
+       }
+
+       smp_wmb(); /* Set rtt before responded. */
+       server->probe.responded = true;
+       set_bit(AFS_SERVER_FL_PROBED, &server->flags);
+out:
+       spin_unlock(&server->probe_lock);
+
+       _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
+              server_index, index, &alist->addrs[index].transport,
+              (unsigned int)rtt, ret);
+
+       have_result |= afs_fs_probe_done(server);
+       if (have_result) {
+               server->probe.have_result = true;
+               wake_up_var(&server->probe.have_result);
+               wake_up_all(&server->probe_wq);
+       }
+}
+
+/*
+ * Probe all of a fileserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static int afs_do_probe_fileserver(struct afs_net *net,
+                                  struct afs_server *server,
+                                  struct key *key,
+                                  unsigned int server_index)
+{
+       struct afs_addr_cursor ac = {
+               .index = 0,
+       };
+       int ret;
+
+       _enter("%pU", &server->uuid);
+
+       read_lock(&server->fs_lock);
+       ac.alist = rcu_dereference_protected(server->addresses,
+                                            lockdep_is_held(&server->fs_lock));
+       read_unlock(&server->fs_lock);
+
+       atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+       memset(&server->probe, 0, sizeof(server->probe));
+       server->probe.rtt = UINT_MAX;
+
+       for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
+               ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
+                                             true);
+               if (ret != -EINPROGRESS) {
+                       afs_fs_probe_done(server);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_probe_fileservers(struct afs_net *net, struct key *key,
+                         struct afs_server_list *list)
+{
+       struct afs_server *server;
+       int i, ret;
+
+       for (i = 0; i < list->nr_servers; i++) {
+               server = list->servers[i].server;
+               if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
+                       continue;
+
+               if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
+                       ret = afs_do_probe_fileserver(net, server, key, i);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Wait for the first as-yet untried fileserver to respond.
+ */
+int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
+{
+       struct wait_queue_entry *waits;
+       struct afs_server *server;
+       unsigned int rtt = UINT_MAX;
+       bool have_responders = false;
+       int pref = -1, i;
+
+       _enter("%u,%lx", slist->nr_servers, untried);
+
+       /* Only wait for servers that have a probe outstanding. */
+       for (i = 0; i < slist->nr_servers; i++) {
+               if (test_bit(i, &untried)) {
+                       server = slist->servers[i].server;
+                       if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+                               __clear_bit(i, &untried);
+                       if (server->probe.responded)
+                               have_responders = true;
+               }
+       }
+       if (have_responders || !untried)
+               return 0;
+
+       waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+       if (!waits)
+               return -ENOMEM;
+
+       for (i = 0; i < slist->nr_servers; i++) {
+               if (test_bit(i, &untried)) {
+                       server = slist->servers[i].server;
+                       init_waitqueue_entry(&waits[i], current);
+                       add_wait_queue(&server->probe_wq, &waits[i]);
+               }
+       }
+
+       for (;;) {
+               bool still_probing = false;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               for (i = 0; i < slist->nr_servers; i++) {
+                       if (test_bit(i, &untried)) {
+                               server = slist->servers[i].server;
+                               if (server->probe.responded)
+                                       goto stop;
+                               if (test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+                                       still_probing = true;
+                       }
+               }
+
+               if (!still_probing || unlikely(signal_pending(current)))
+                       goto stop;
+               schedule();
+       }
+
+stop:
+       set_current_state(TASK_RUNNING);
+
+       for (i = 0; i < slist->nr_servers; i++) {
+               if (test_bit(i, &untried)) {
+                       server = slist->servers[i].server;
+                       if (server->probe.responded &&
+                           server->probe.rtt < rtt) {
+                               pref = i;
+                               rtt = server->probe.rtt;
+                       }
+
+                       remove_wait_queue(&server->probe_wq, &waits[i]);
+               }
+       }
+
+       kfree(waits);
+
+       if (pref == -1 && signal_pending(current))
+               return -ERESTARTSYS;
+
+       if (pref >= 0)
+               slist->preferred = pref;
+       return 0;
+}
index 50929cb91732f5adec19706788e6a31aeb8beb03..ca08c83168f5fbf1f7f6b52c8c3ff769bf70cf04 100644 (file)
 #include "internal.h"
 #include "afs_fs.h"
 #include "xdr_fs.h"
+#include "protocol_yfs.h"
 
 static const struct afs_fid afs_zero_fid;
 
-/*
- * We need somewhere to discard into in case the server helpfully returns more
- * than we asked for in FS.FetchData{,64}.
- */
-static u8 afs_discard_buffer[64];
-
 static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
 {
        call->cbi = afs_get_cb_interest(cbi);
@@ -75,8 +70,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode,
        struct timespec64 t;
        umode_t mode;
 
-       t.tv_sec = status->mtime_client;
-       t.tv_nsec = 0;
+       t = status->mtime_client;
        vnode->vfs_inode.i_ctime = t;
        vnode->vfs_inode.i_mtime = t;
        vnode->vfs_inode.i_atime = t;
@@ -96,7 +90,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode,
        if (!(flags & AFS_VNODE_NOT_YET_SET)) {
                if (expected_version &&
                    *expected_version != status->data_version) {
-                       _debug("vnode modified %llx on {%x:%u} [exp %llx]",
+                       _debug("vnode modified %llx on {%llx:%llu} [exp %llx]",
                               (unsigned long long) status->data_version,
                               vnode->fid.vid, vnode->fid.vnode,
                               (unsigned long long) *expected_version);
@@ -170,7 +164,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
                if (type != status->type &&
                    vnode &&
                    !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
-                       pr_warning("Vnode %x:%x:%x changed type %u to %u\n",
+                       pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
                                   vnode->fid.vid,
                                   vnode->fid.vnode,
                                   vnode->fid.unique,
@@ -200,8 +194,10 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
        EXTRACT_M(mode);
        EXTRACT_M(group);
 
-       status->mtime_client = ntohl(xdr->mtime_client);
-       status->mtime_server = ntohl(xdr->mtime_server);
+       status->mtime_client.tv_sec = ntohl(xdr->mtime_client);
+       status->mtime_client.tv_nsec = 0;
+       status->mtime_server.tv_sec = ntohl(xdr->mtime_server);
+       status->mtime_server.tv_nsec = 0;
        status->lock_count   = ntohl(xdr->lock_count);
 
        size  = (u64)ntohl(xdr->size_lo);
@@ -233,7 +229,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
 
 bad:
        xdr_dump_bad(*_bp);
-       return afs_protocol_error(call, -EBADMSG);
+       return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
 }
 
 /*
@@ -273,7 +269,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
 
        write_seqlock(&vnode->cb_lock);
 
-       if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
+       if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
                vnode->cb_version       = ntohl(*bp++);
                cb_expiry               = ntohl(*bp++);
                vnode->cb_type          = ntohl(*bp++);
@@ -293,13 +289,19 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
        *_bp = bp;
 }
 
-static void xdr_decode_AFSCallBack_raw(const __be32 **_bp,
+static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
+{
+       return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC);
+}
+
+static void xdr_decode_AFSCallBack_raw(struct afs_call *call,
+                                      const __be32 **_bp,
                                       struct afs_callback *cb)
 {
        const __be32 *bp = *_bp;
 
        cb->version     = ntohl(*bp++);
-       cb->expiry      = ntohl(*bp++);
+       cb->expires_at  = xdr_decode_expiry(call, ntohl(*bp++));
        cb->type        = ntohl(*bp++);
        *_bp = bp;
 }
@@ -311,14 +313,18 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp,
                                  struct afs_volsync *volsync)
 {
        const __be32 *bp = *_bp;
+       u32 creation;
 
-       volsync->creation = ntohl(*bp++);
+       creation = ntohl(*bp++);
        bp++; /* spare2 */
        bp++; /* spare3 */
        bp++; /* spare4 */
        bp++; /* spare5 */
        bp++; /* spare6 */
        *_bp = bp;
+
+       if (volsync)
+               volsync->creation = creation;
 }
 
 /*
@@ -379,6 +385,8 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
        vs->blocks_in_use       = ntohl(*bp++);
        vs->part_blocks_avail   = ntohl(*bp++);
        vs->part_max_blocks     = ntohl(*bp++);
+       vs->vol_copy_date       = 0;
+       vs->vol_backup_date     = 0;
        *_bp = bp;
 }
 
@@ -395,16 +403,16 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
        if (ret < 0)
                return ret;
 
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        xdr_decode_AFSCallBack(call, vnode, &bp);
-       if (call->reply[1])
-               xdr_decode_AFSVolSync(&bp, call->reply[1]);
+       xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
        _leave(" = 0 [done]");
        return 0;
@@ -431,7 +439,10 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_fetch_file_status(fc, volsync, new_inode);
+
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode,
@@ -445,6 +456,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
        call->reply[0] = vnode;
        call->reply[1] = volsync;
        call->expected_version = new_inode ? 1 : vnode->status.data_version;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -468,139 +480,117 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
        struct afs_read *req = call->reply[2];
        const __be32 *bp;
        unsigned int size;
-       void *buffer;
        int ret;
 
-       _enter("{%u,%zu/%u;%llu/%llu}",
-              call->unmarshall, call->offset, call->count,
-              req->remain, req->actual_len);
+       _enter("{%u,%zu/%llu}",
+              call->unmarshall, iov_iter_count(&call->iter), req->actual_len);
 
        switch (call->unmarshall) {
        case 0:
                req->actual_len = 0;
-               call->offset = 0;
+               req->index = 0;
+               req->offset = req->pos & (PAGE_SIZE - 1);
                call->unmarshall++;
-               if (call->operation_ID != FSFETCHDATA64) {
-                       call->unmarshall++;
-                       goto no_msw;
+               if (call->operation_ID == FSFETCHDATA64) {
+                       afs_extract_to_tmp64(call);
+               } else {
+                       call->tmp_u = htonl(0);
+                       afs_extract_to_tmp(call);
                }
 
-               /* extract the upper part of the returned data length of an
-                * FSFETCHDATA64 op (which should always be 0 using this
-                * client) */
-       case 1:
-               _debug("extract data length (MSW)");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
-               if (ret < 0)
-                       return ret;
-
-               req->actual_len = ntohl(call->tmp);
-               req->actual_len <<= 32;
-               call->offset = 0;
-               call->unmarshall++;
-
-       no_msw:
                /* extract the returned data length */
-       case 2:
+       case 1:
                _debug("extract data length");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
-               req->actual_len |= ntohl(call->tmp);
+               req->actual_len = be64_to_cpu(call->tmp64);
                _debug("DATA length: %llu", req->actual_len);
-
-               req->remain = req->actual_len;
-               call->offset = req->pos & (PAGE_SIZE - 1);
-               req->index = 0;
-               if (req->actual_len == 0)
+               req->remain = min(req->len, req->actual_len);
+               if (req->remain == 0)
                        goto no_more_data;
+
                call->unmarshall++;
 
        begin_page:
                ASSERTCMP(req->index, <, req->nr_pages);
-               if (req->remain > PAGE_SIZE - call->offset)
-                       size = PAGE_SIZE - call->offset;
+               if (req->remain > PAGE_SIZE - req->offset)
+                       size = PAGE_SIZE - req->offset;
                else
                        size = req->remain;
-               call->count = call->offset + size;
-               ASSERTCMP(call->count, <=, PAGE_SIZE);
-               req->remain -= size;
+               call->bvec[0].bv_len = size;
+               call->bvec[0].bv_offset = req->offset;
+               call->bvec[0].bv_page = req->pages[req->index];
+               iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
+               ASSERTCMP(size, <=, PAGE_SIZE);
 
                /* extract the returned data */
-       case 3:
-               _debug("extract data %llu/%llu %zu/%u",
-                      req->remain, req->actual_len, call->offset, call->count);
+       case 2:
+               _debug("extract data %zu/%llu",
+                      iov_iter_count(&call->iter), req->remain);
 
-               buffer = kmap(req->pages[req->index]);
-               ret = afs_extract_data(call, buffer, call->count, true);
-               kunmap(req->pages[req->index]);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
-               if (call->offset == PAGE_SIZE) {
+               req->remain -= call->bvec[0].bv_len;
+               req->offset += call->bvec[0].bv_len;
+               ASSERTCMP(req->offset, <=, PAGE_SIZE);
+               if (req->offset == PAGE_SIZE) {
+                       req->offset = 0;
                        if (req->page_done)
                                req->page_done(call, req);
                        req->index++;
-                       if (req->remain > 0) {
-                               call->offset = 0;
-                               if (req->index >= req->nr_pages) {
-                                       call->unmarshall = 4;
-                                       goto begin_discard;
-                               }
+                       if (req->remain > 0)
                                goto begin_page;
-                       }
                }
-               goto no_more_data;
+
+               ASSERTCMP(req->remain, ==, 0);
+               if (req->actual_len <= req->len)
+                       goto no_more_data;
 
                /* Discard any excess data the server gave us */
-       begin_discard:
-       case 4:
-               size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
-               call->count = size;
-               _debug("extract discard %llu/%llu %zu/%u",
-                      req->remain, req->actual_len, call->offset, call->count);
-
-               call->offset = 0;
-               ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
-               req->remain -= call->offset;
+               iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
+               call->unmarshall = 3;
+       case 3:
+               _debug("extract discard %zu/%llu",
+                      iov_iter_count(&call->iter), req->actual_len - req->len);
+
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
-               if (req->remain > 0)
-                       goto begin_discard;
 
        no_more_data:
-               call->offset = 0;
-               call->unmarshall = 5;
+               call->unmarshall = 4;
+               afs_extract_to_buf(call, (21 + 3 + 6) * 4);
 
                /* extract the metadata */
-       case 5:
-               ret = afs_extract_data(call, call->buffer,
-                                      (21 + 3 + 6) * 4, false);
+       case 4:
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
-               if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                                     &vnode->status.data_version, req) < 0)
-                       return afs_protocol_error(call, -EBADMSG);
+               ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                                       &vnode->status.data_version, req);
+               if (ret < 0)
+                       return ret;
                xdr_decode_AFSCallBack(call, vnode, &bp);
-               if (call->reply[1])
-                       xdr_decode_AFSVolSync(&bp, call->reply[1]);
+               xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
-               call->offset = 0;
                call->unmarshall++;
 
-       case 6:
+       case 5:
                break;
        }
 
        for (; req->index < req->nr_pages; req->index++) {
-               if (call->count < PAGE_SIZE)
+               if (req->offset < PAGE_SIZE)
                        zero_user_segment(req->pages[req->index],
-                                         call->count, PAGE_SIZE);
+                                         req->offset, PAGE_SIZE);
                if (req->page_done)
                        req->page_done(call, req);
-               call->count = 0;
+               req->offset = 0;
        }
 
        _leave(" = 0 [done]");
@@ -653,6 +643,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
        call->reply[1] = NULL; /* volsync */
        call->reply[2] = req;
        call->expected_version = vnode->status.data_version;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -682,6 +673,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_fetch_data(fc, req);
+
        if (upper_32_bits(req->pos) ||
            upper_32_bits(req->len) ||
            upper_32_bits(req->pos + req->len))
@@ -698,6 +692,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
        call->reply[1] = NULL; /* volsync */
        call->reply[2] = req;
        call->expected_version = vnode->status.data_version;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -733,11 +728,14 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
        xdr_decode_AFSFid(&bp, call->reply[1]);
-       if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 ||
-           afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
-       xdr_decode_AFSCallBack_raw(&bp, call->reply[3]);
+       ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]);
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -778,6 +776,15 @@ int afs_fs_create(struct afs_fs_cursor *fc,
        size_t namesz, reqsz, padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){
+               if (S_ISDIR(mode))
+                       return yfs_fs_make_dir(fc, name, mode, current_data_version,
+                                              newfid, newstatus, newcb);
+               else
+                       return yfs_fs_create_file(fc, name, mode, current_data_version,
+                                                 newfid, newstatus, newcb);
+       }
+
        _enter("");
 
        namesz = strlen(name);
@@ -796,6 +803,7 @@ int afs_fs_create(struct afs_fs_cursor *fc,
        call->reply[2] = newstatus;
        call->reply[3] = newcb;
        call->expected_version = current_data_version + 1;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -839,9 +847,10 @@ static int afs_deliver_fs_remove(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -868,15 +877,18 @@ static const struct afs_call_type afs_RXFSRemoveDir = {
 /*
  * remove a file or directory
  */
-int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
-                 u64 current_data_version)
+int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+                 const char *name, bool isdir, u64 current_data_version)
 {
-       struct afs_vnode *vnode = fc->vnode;
+       struct afs_vnode *dvnode = fc->vnode;
        struct afs_call *call;
-       struct afs_net *net = afs_v2net(vnode);
+       struct afs_net *net = afs_v2net(dvnode);
        size_t namesz, reqsz, padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_remove(fc, vnode, name, isdir, current_data_version);
+
        _enter("");
 
        namesz = strlen(name);
@@ -890,15 +902,16 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
                return -ENOMEM;
 
        call->key = fc->key;
-       call->reply[0] = vnode;
+       call->reply[0] = dvnode;
+       call->reply[1] = vnode;
        call->expected_version = current_data_version + 1;
 
        /* marshall the parameters */
        bp = call->request;
        *bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE);
-       *bp++ = htonl(vnode->fid.vid);
-       *bp++ = htonl(vnode->fid.vnode);
-       *bp++ = htonl(vnode->fid.unique);
+       *bp++ = htonl(dvnode->fid.vid);
+       *bp++ = htonl(dvnode->fid.vnode);
+       *bp++ = htonl(dvnode->fid.unique);
        *bp++ = htonl(namesz);
        memcpy(bp, name, namesz);
        bp = (void *) bp + namesz;
@@ -908,7 +921,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
        }
 
        afs_use_fs_server(call, fc->cbi);
-       trace_afs_make_fs_call(call, &vnode->fid);
+       trace_afs_make_fs_call(call, &dvnode->fid);
        return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -929,10 +942,13 @@ static int afs_deliver_fs_link(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 ||
-           afs_decode_status(call, &bp, &dvnode->status, dvnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       ret = afs_decode_status(call, &bp, &dvnode->status, dvnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -961,6 +977,9 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
        size_t namesz, reqsz, padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_link(fc, vnode, name, current_data_version);
+
        _enter("");
 
        namesz = strlen(name);
@@ -1016,10 +1035,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
        xdr_decode_AFSFid(&bp, call->reply[1]);
-       if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) ||
-           afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -1052,6 +1074,10 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
        size_t namesz, reqsz, padsz, c_namesz, c_padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_symlink(fc, name, contents, current_data_version,
+                                     newfid, newstatus);
+
        _enter("");
 
        namesz = strlen(name);
@@ -1122,13 +1148,16 @@ static int afs_deliver_fs_rename(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
-       if (new_dvnode != orig_dvnode &&
-           afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
-                             &call->expected_version_2, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       if (new_dvnode != orig_dvnode) {
+               ret = afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
+                                       &call->expected_version_2, NULL);
+               if (ret < 0)
+                       return ret;
+       }
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -1161,6 +1190,12 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
        size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_rename(fc, orig_name,
+                                    new_dvnode, new_name,
+                                    current_orig_data_version,
+                                    current_new_data_version);
+
        _enter("");
 
        o_namesz = strlen(orig_name);
@@ -1231,9 +1266,10 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        afs_pages_written_back(vnode, call);
@@ -1273,7 +1309,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        call = afs_alloc_flat_call(net, &afs_RXFSStoreData64,
@@ -1330,7 +1366,10 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
        loff_t size, pos, i_size;
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_store_data(fc, mapping, first, last, offset, to);
+
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        size = (loff_t)to - (loff_t)offset;
@@ -1407,9 +1446,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -1451,7 +1491,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        ASSERT(attr->ia_valid & ATTR_SIZE);
@@ -1498,7 +1538,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        ASSERT(attr->ia_valid & ATTR_SIZE);
@@ -1544,10 +1584,13 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_setattr(fc, attr);
+
        if (attr->ia_valid & ATTR_SIZE)
                return afs_fs_setattr_size(fc, attr);
 
-       _enter(",%x,{%x:%u},,",
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus,
@@ -1581,164 +1624,114 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 {
        const __be32 *bp;
        char *p;
+       u32 size;
        int ret;
 
        _enter("{%u}", call->unmarshall);
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
                call->unmarshall++;
+               afs_extract_to_buf(call, 12 * 4);
 
                /* extract the returned status record */
        case 1:
                _debug("extract status");
-               ret = afs_extract_data(call, call->buffer,
-                                      12 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
                xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
-               call->offset = 0;
                call->unmarshall++;
+               afs_extract_to_tmp(call);
 
                /* extract the volume name length */
        case 2:
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count = ntohl(call->tmp);
                _debug("volname length: %u", call->count);
                if (call->count >= AFSNAMEMAX)
-                       return afs_protocol_error(call, -EBADMSG);
-               call->offset = 0;
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_volname_len);
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
                call->unmarshall++;
 
                /* extract the volume name */
        case 3:
                _debug("extract volname");
-               if (call->count > 0) {
-                       ret = afs_extract_data(call, call->reply[2],
-                                              call->count, true);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
 
                p = call->reply[2];
                p[call->count] = 0;
                _debug("volname '%s'", p);
-
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
-               /* extract the volume name padding */
-               if ((call->count & 3) == 0) {
-                       call->unmarshall++;
-                       goto no_volname_padding;
-               }
-               call->count = 4 - (call->count & 3);
-
-       case 4:
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count, true);
-               if (ret < 0)
-                       return ret;
-
-               call->offset = 0;
-               call->unmarshall++;
-       no_volname_padding:
-
                /* extract the offline message length */
-       case 5:
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+       case 4:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count = ntohl(call->tmp);
                _debug("offline msg length: %u", call->count);
                if (call->count >= AFSNAMEMAX)
-                       return afs_protocol_error(call, -EBADMSG);
-               call->offset = 0;
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_offline_msg_len);
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
                call->unmarshall++;
 
                /* extract the offline message */
-       case 6:
+       case 5:
                _debug("extract offline");
-               if (call->count > 0) {
-                       ret = afs_extract_data(call, call->reply[2],
-                                              call->count, true);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
 
                p = call->reply[2];
                p[call->count] = 0;
                _debug("offline '%s'", p);
 
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
-               /* extract the offline message padding */
-               if ((call->count & 3) == 0) {
-                       call->unmarshall++;
-                       goto no_offline_padding;
-               }
-               call->count = 4 - (call->count & 3);
-
-       case 7:
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count, true);
-               if (ret < 0)
-                       return ret;
-
-               call->offset = 0;
-               call->unmarshall++;
-       no_offline_padding:
-
                /* extract the message of the day length */
-       case 8:
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+       case 6:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count = ntohl(call->tmp);
                _debug("motd length: %u", call->count);
                if (call->count >= AFSNAMEMAX)
-                       return afs_protocol_error(call, -EBADMSG);
-               call->offset = 0;
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_motd_len);
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
                call->unmarshall++;
 
                /* extract the message of the day */
-       case 9:
+       case 7:
                _debug("extract motd");
-               if (call->count > 0) {
-                       ret = afs_extract_data(call, call->reply[2],
-                                              call->count, true);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = afs_extract_data(call, false);
+               if (ret < 0)
+                       return ret;
 
                p = call->reply[2];
                p[call->count] = 0;
                _debug("motd '%s'", p);
 
-               call->offset = 0;
                call->unmarshall++;
 
-               /* extract the message of the day padding */
-               call->count = (4 - (call->count & 3)) & 3;
-
-       case 10:
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count, false);
-               if (ret < 0)
-                       return ret;
-
-               call->offset = 0;
-               call->unmarshall++;
-       case 11:
+       case 8:
                break;
        }
 
@@ -1778,6 +1771,9 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
        __be32 *bp;
        void *tmpbuf;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_get_volume_status(fc, vs);
+
        _enter("");
 
        tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
@@ -1867,6 +1863,9 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_set_lock(fc, type);
+
        _enter("");
 
        call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4);
@@ -1899,6 +1898,9 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_extend_lock(fc);
+
        _enter("");
 
        call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4);
@@ -1930,6 +1932,9 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_release_lock(fc);
+
        _enter("");
 
        call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4);
@@ -2004,19 +2009,16 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
        u32 count;
        int ret;
 
-       _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+       _enter("{%u,%zu}", call->unmarshall, iov_iter_count(&call->iter));
 
-again:
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* Extract the capabilities word count */
        case 1:
-               ret = afs_extract_data(call, &call->tmp,
-                                      1 * sizeof(__be32),
-                                      true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -2024,24 +2026,17 @@ again:
 
                call->count = count;
                call->count2 = count;
-               call->offset = 0;
+               iov_iter_discard(&call->iter, READ, count * sizeof(__be32));
                call->unmarshall++;
 
                /* Extract capabilities words */
        case 2:
-               count = min(call->count, 16U);
-               ret = afs_extract_data(call, call->buffer,
-                                      count * sizeof(__be32),
-                                      call->count > 16);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
                /* TODO: Examine capabilities */
 
-               call->count -= count;
-               if (call->count > 0)
-                       goto again;
-               call->offset = 0;
                call->unmarshall++;
                break;
        }
@@ -2050,6 +2045,14 @@ again:
        return 0;
 }
 
+static void afs_destroy_fs_get_capabilities(struct afs_call *call)
+{
+       struct afs_server *server = call->reply[0];
+
+       afs_put_server(call->net, server);
+       afs_flat_call_destructor(call);
+}
+
 /*
  * FS.GetCapabilities operation type
  */
@@ -2057,7 +2060,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
        .name           = "FS.GetCapabilities",
        .op             = afs_FS_GetCapabilities,
        .deliver        = afs_deliver_fs_get_capabilities,
-       .destructor     = afs_flat_call_destructor,
+       .done           = afs_fileserver_probe_result,
+       .destructor     = afs_destroy_fs_get_capabilities,
 };
 
 /*
@@ -2067,7 +2071,9 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
 int afs_fs_get_capabilities(struct afs_net *net,
                            struct afs_server *server,
                            struct afs_addr_cursor *ac,
-                           struct key *key)
+                           struct key *key,
+                           unsigned int server_index,
+                           bool async)
 {
        struct afs_call *call;
        __be32 *bp;
@@ -2079,6 +2085,10 @@ int afs_fs_get_capabilities(struct afs_net *net,
                return -ENOMEM;
 
        call->key = key;
+       call->reply[0] = afs_get_server(server);
+       call->reply[1] = (void *)(long)server_index;
+       call->upgrade = true;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -2086,7 +2096,7 @@ int afs_fs_get_capabilities(struct afs_net *net,
 
        /* Can't take a ref on server */
        trace_afs_make_fs_call(call, NULL);
-       return afs_make_call(ac, call, GFP_NOFS, false);
+       return afs_make_call(ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -2097,7 +2107,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
        struct afs_file_status *status = call->reply[1];
        struct afs_callback *callback = call->reply[2];
        struct afs_volsync *volsync = call->reply[3];
-       struct afs_vnode *vnode = call->reply[0];
+       struct afs_fid *fid = call->reply[0];
        const __be32 *bp;
        int ret;
 
@@ -2105,21 +2115,16 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
        if (ret < 0)
                return ret;
 
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", fid->vid, fid->vnode);
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       afs_decode_status(call, &bp, status, vnode,
-                         &call->expected_version, NULL);
-       callback[call->count].version   = ntohl(bp[0]);
-       callback[call->count].expiry    = ntohl(bp[1]);
-       callback[call->count].type      = ntohl(bp[2]);
-       if (vnode)
-               xdr_decode_AFSCallBack(call, vnode, &bp);
-       else
-               bp += 3;
-       if (volsync)
-               xdr_decode_AFSVolSync(&bp, volsync);
+       ret = afs_decode_status(call, &bp, status, NULL,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_AFSCallBack_raw(call, &bp, callback);
+       xdr_decode_AFSVolSync(&bp, volsync);
 
        _leave(" = 0 [done]");
        return 0;
@@ -2148,7 +2153,10 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
        struct afs_call *call;
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_fetch_status(fc, net, fid, status, callback, volsync);
+
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), fid->vid, fid->vnode);
 
        call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
@@ -2158,11 +2166,12 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
        }
 
        call->key = fc->key;
-       call->reply[0] = NULL; /* vnode for fid[0] */
+       call->reply[0] = fid;
        call->reply[1] = status;
        call->reply[2] = callback;
        call->reply[3] = volsync;
        call->expected_version = 1; /* vnode->status.data_version */
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -2193,38 +2202,40 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* Extract the file status count and array in two steps */
        case 1:
                _debug("extract status count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                tmp = ntohl(call->tmp);
                _debug("status count: %u/%u", tmp, call->count2);
                if (tmp != call->count2)
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_ibulkst_count);
 
                call->count = 0;
                call->unmarshall++;
        more_counts:
-               call->offset = 0;
+               afs_extract_to_buf(call, 21 * sizeof(__be32));
 
        case 2:
                _debug("extract status array %u", call->count);
-               ret = afs_extract_data(call, call->buffer, 21 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
                statuses = call->reply[1];
-               if (afs_decode_status(call, &bp, &statuses[call->count],
-                                     call->count == 0 ? vnode : NULL,
-                                     NULL, NULL) < 0)
-                       return afs_protocol_error(call, -EBADMSG);
+               ret = afs_decode_status(call, &bp, &statuses[call->count],
+                                       call->count == 0 ? vnode : NULL,
+                                       NULL, NULL);
+               if (ret < 0)
+                       return ret;
 
                call->count++;
                if (call->count < call->count2)
@@ -2232,27 +2243,28 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 
                call->count = 0;
                call->unmarshall++;
-               call->offset = 0;
+               afs_extract_to_tmp(call);
 
                /* Extract the callback count and array in two steps */
        case 3:
                _debug("extract CB count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                tmp = ntohl(call->tmp);
                _debug("CB count: %u", tmp);
                if (tmp != call->count2)
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_ibulkst_cb_count);
                call->count = 0;
                call->unmarshall++;
        more_cbs:
-               call->offset = 0;
+               afs_extract_to_buf(call, 3 * sizeof(__be32));
 
        case 4:
                _debug("extract CB array");
-               ret = afs_extract_data(call, call->buffer, 3 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -2260,7 +2272,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
                bp = call->buffer;
                callbacks = call->reply[2];
                callbacks[call->count].version  = ntohl(bp[0]);
-               callbacks[call->count].expiry   = ntohl(bp[1]);
+               callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1]));
                callbacks[call->count].type     = ntohl(bp[2]);
                statuses = call->reply[1];
                if (call->count == 0 && vnode && statuses[0].abort_code == 0)
@@ -2269,19 +2281,17 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
                if (call->count < call->count2)
                        goto more_cbs;
 
-               call->offset = 0;
+               afs_extract_to_buf(call, 6 * sizeof(__be32));
                call->unmarshall++;
 
        case 5:
-               ret = afs_extract_data(call, call->buffer, 6 * 4, false);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
-               if (call->reply[3])
-                       xdr_decode_AFSVolSync(&bp, call->reply[3]);
+               xdr_decode_AFSVolSync(&bp, call->reply[3]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 6:
@@ -2317,7 +2327,11 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
        __be32 *bp;
        int i;
 
-       _enter(",%x,{%x:%u},%u",
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_inline_bulk_status(fc, net, fids, statuses, callbacks,
+                                                nr_fids, volsync);
+
+       _enter(",%x,{%llx:%llu},%u",
               key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids);
 
        call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus,
@@ -2334,6 +2348,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
        call->reply[2] = callbacks;
        call->reply[3] = volsync;
        call->count2 = nr_fids;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
index 479b7fdda1244f5bf210694e275826cba99b5553..4c6d8e1112c2b716ef788afbb31635150af3f87b 100644 (file)
@@ -82,7 +82,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key)
        default:
                printk("kAFS: AFS vnode with undefined type\n");
                read_sequnlock_excl(&vnode->cb_lock);
-               return afs_protocol_error(NULL, -EBADMSG);
+               return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
        }
 
        inode->i_blocks         = 0;
@@ -100,7 +100,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s,{%x:%u.%u,S=%lx}",
+       _enter("%s,{%llx:%llu.%u,S=%lx}",
               vnode->volume->name,
               vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
               vnode->flags);
@@ -127,9 +127,9 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
 int afs_iget5_test(struct inode *inode, void *opaque)
 {
        struct afs_iget_data *data = opaque;
+       struct afs_vnode *vnode = AFS_FS_I(inode);
 
-       return inode->i_ino == data->fid.vnode &&
-               inode->i_generation == data->fid.unique;
+       return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0;
 }
 
 /*
@@ -150,11 +150,14 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
        struct afs_iget_data *data = opaque;
        struct afs_vnode *vnode = AFS_FS_I(inode);
 
-       inode->i_ino = data->fid.vnode;
-       inode->i_generation = data->fid.unique;
        vnode->fid = data->fid;
        vnode->volume = data->volume;
 
+       /* YFS supports 96-bit vnode IDs, but Linux only supports
+        * 64-bit inode numbers.
+        */
+       inode->i_ino = data->fid.vnode;
+       inode->i_generation = data->fid.unique;
        return 0;
 }
 
@@ -193,7 +196,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
                return ERR_PTR(-ENOMEM);
        }
 
-       _debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }",
+       _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
               inode, inode->i_ino, data.fid.vid, data.fid.vnode,
               data.fid.unique);
 
@@ -252,8 +255,8 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
 
        key.vnode_id            = vnode->fid.vnode;
        key.unique              = vnode->fid.unique;
-       key.vnode_id_ext[0]     = 0;
-       key.vnode_id_ext[1]     = 0;
+       key.vnode_id_ext[0]     = vnode->fid.vnode >> 32;
+       key.vnode_id_ext[1]     = vnode->fid.vnode_hi;
        aux.data_version        = vnode->status.data_version;
 
        vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
@@ -277,7 +280,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
        struct inode *inode;
        int ret;
 
-       _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique);
+       _enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique);
 
        as = sb->s_fs_info;
        data.volume = as->volume;
@@ -289,7 +292,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
                return ERR_PTR(-ENOMEM);
        }
 
-       _debug("GOT INODE %p { vl=%x vn=%x, u=%x }",
+       _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }",
               inode, fid->vid, fid->vnode, fid->unique);
 
        vnode = AFS_FS_I(inode);
@@ -314,11 +317,11 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
                         * didn't give us a callback) */
                        vnode->cb_version = 0;
                        vnode->cb_type = 0;
-                       vnode->cb_expires_at = 0;
+                       vnode->cb_expires_at = ktime_get();
                } else {
                        vnode->cb_version = cb->version;
                        vnode->cb_type = cb->type;
-                       vnode->cb_expires_at = cb->expiry;
+                       vnode->cb_expires_at = cb->expires_at;
                        vnode->cb_interest = afs_get_cb_interest(cbi);
                        set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
                }
@@ -352,7 +355,7 @@ bad_inode:
  */
 void afs_zap_data(struct afs_vnode *vnode)
 {
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
 #ifdef CONFIG_AFS_FSCACHE
        fscache_invalidate(vnode->cache);
@@ -382,7 +385,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
        bool valid = false;
        int ret;
 
-       _enter("{v={%x:%u} fl=%lx},%x",
+       _enter("{v={%llx:%llu} fl=%lx},%x",
               vnode->fid.vid, vnode->fid.vnode, vnode->flags,
               key_serial(key));
 
@@ -501,7 +504,7 @@ void afs_evict_inode(struct inode *inode)
 
        vnode = AFS_FS_I(inode);
 
-       _enter("{%x:%u.%d}",
+       _enter("{%llx:%llu.%d}",
               vnode->fid.vid,
               vnode->fid.vnode,
               vnode->fid.unique);
@@ -550,7 +553,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
        struct key *key;
        int ret;
 
-       _enter("{%x:%u},{n=%pd},%x",
+       _enter("{%llx:%llu},{n=%pd},%x",
               vnode->fid.vid, vnode->fid.vnode, dentry,
               attr->ia_valid);
 
index 72de1f157d20235b4c2a103d5f098b5b9cf7dd1f..5da3b09b751867bc9c0bbb8c23b362fbab942a93 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/backing-dev.h>
 #include <linux/uuid.h>
 #include <linux/mm_types.h>
+#include <linux/dns_resolver.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
@@ -75,10 +76,13 @@ struct afs_addr_list {
        u32                     version;        /* Version */
        unsigned char           max_addrs;
        unsigned char           nr_addrs;
-       unsigned char           index;          /* Address currently in use */
+       unsigned char           preferred;      /* Preferred address */
        unsigned char           nr_ipv4;        /* Number of IPv4 addresses */
+       enum dns_record_source  source:8;
+       enum dns_lookup_status  status:8;
        unsigned long           probed;         /* Mask of servers that have been probed */
-       unsigned long           yfs;            /* Mask of servers that are YFS */
+       unsigned long           failed;         /* Mask of addrs that failed locally/ICMP */
+       unsigned long           responded;      /* Mask of addrs that responded */
        struct sockaddr_rxrpc   addrs[];
 #define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
 };
@@ -88,6 +92,7 @@ struct afs_addr_list {
  */
 struct afs_call {
        const struct afs_call_type *type;       /* type of call */
+       struct afs_addr_list    *alist;         /* Address is alist[addr_ix] */
        wait_queue_head_t       waitq;          /* processes awaiting completion */
        struct work_struct      async_work;     /* async I/O processor */
        struct work_struct      work;           /* actual work processor */
@@ -98,16 +103,22 @@ struct afs_call {
        struct afs_cb_interest  *cbi;           /* Callback interest for server used */
        void                    *request;       /* request data (first part) */
        struct address_space    *mapping;       /* Pages being written from */
+       struct iov_iter         iter;           /* Buffer iterator */
+       struct iov_iter         *_iter;         /* Iterator currently in use */
+       union { /* Convenience for ->iter */
+               struct kvec     kvec[1];
+               struct bio_vec  bvec[1];
+       };
        void                    *buffer;        /* reply receive buffer */
        void                    *reply[4];      /* Where to put the reply */
        pgoff_t                 first;          /* first page in mapping to deal with */
        pgoff_t                 last;           /* last page in mapping to deal with */
-       size_t                  offset;         /* offset into received data store */
        atomic_t                usage;
        enum afs_call_state     state;
        spinlock_t              state_lock;
        int                     error;          /* error code */
        u32                     abort_code;     /* Remote abort ID or 0 */
+       u32                     epoch;
        unsigned                request_size;   /* size of request data */
        unsigned                reply_max;      /* maximum size of reply */
        unsigned                first_offset;   /* offset into mapping[first] */
@@ -117,19 +128,28 @@ struct afs_call {
                unsigned        count2;         /* count used in unmarshalling */
        };
        unsigned char           unmarshall;     /* unmarshalling phase */
+       unsigned char           addr_ix;        /* Address in ->alist */
        bool                    incoming;       /* T if incoming call */
        bool                    send_pages;     /* T if data from mapping should be sent */
        bool                    need_attention; /* T if RxRPC poked us */
        bool                    async;          /* T if asynchronous */
        bool                    ret_reply0;     /* T if should return reply[0] on success */
        bool                    upgrade;        /* T to request service upgrade */
+       bool                    want_reply_time; /* T if want reply_time */
        u16                     service_id;     /* Actual service ID (after upgrade) */
        unsigned int            debug_id;       /* Trace ID */
        u32                     operation_ID;   /* operation ID for an incoming call */
        u32                     count;          /* count for use in unmarshalling */
-       __be32                  tmp;            /* place to extract temporary data */
+       union {                                 /* place to extract temporary data */
+               struct {
+                       __be32  tmp_u;
+                       __be32  tmp;
+               } __attribute__((packed));
+               __be64          tmp64;
+       };
        afs_dataversion_t       expected_version; /* Updated version expected from store */
        afs_dataversion_t       expected_version_2; /* 2nd updated version expected from store */
+       ktime_t                 reply_time;     /* Time of first reply packet */
 };
 
 struct afs_call_type {
@@ -146,6 +166,9 @@ struct afs_call_type {
 
        /* Work function */
        void (*work)(struct work_struct *work);
+
+       /* Call done function (gets called immediately on success or failure) */
+       void (*done)(struct afs_call *call);
 };
 
 /*
@@ -185,6 +208,7 @@ struct afs_read {
        refcount_t              usage;
        unsigned int            index;          /* Which page we're reading into */
        unsigned int            nr_pages;
+       unsigned int            offset;         /* offset into current page */
        void (*page_done)(struct afs_call *, struct afs_read *);
        struct page             **pages;
        struct page             *array[];
@@ -343,12 +367,69 @@ struct afs_cell {
        rwlock_t                proc_lock;
 
        /* VL server list. */
-       rwlock_t                vl_addrs_lock;  /* Lock on vl_addrs */
-       struct afs_addr_list    __rcu *vl_addrs; /* List of VL servers */
+       rwlock_t                vl_servers_lock; /* Lock on vl_servers */
+       struct afs_vlserver_list __rcu *vl_servers;
+
        u8                      name_len;       /* Length of name */
        char                    name[64 + 1];   /* Cell name, case-flattened and NUL-padded */
 };
 
+/*
+ * Volume Location server record.
+ */
+struct afs_vlserver {
+       struct rcu_head         rcu;
+       struct afs_addr_list    __rcu *addresses; /* List of addresses for this VL server */
+       unsigned long           flags;
+#define AFS_VLSERVER_FL_PROBED 0               /* The VL server has been probed */
+#define AFS_VLSERVER_FL_PROBING        1               /* VL server is being probed */
+#define AFS_VLSERVER_FL_IS_YFS 2               /* Server is YFS not AFS */
+       rwlock_t                lock;           /* Lock on addresses */
+       atomic_t                usage;
+
+       /* Probe state */
+       wait_queue_head_t       probe_wq;
+       atomic_t                probe_outstanding;
+       spinlock_t              probe_lock;
+       struct {
+               unsigned int    rtt;            /* RTT as ktime/64 */
+               u32             abort_code;
+               short           error;
+               bool            have_result;
+               bool            responded:1;
+               bool            is_yfs:1;
+               bool            not_yfs:1;
+               bool            local_failure:1;
+       } probe;
+
+       u16                     port;
+       u16                     name_len;       /* Length of name */
+       char                    name[];         /* Server name, case-flattened */
+};
+
+/*
+ * Weighted list of Volume Location servers.
+ */
+struct afs_vlserver_entry {
+       u16                     priority;       /* Preference (as SRV) */
+       u16                     weight;         /* Weight (as SRV) */
+       enum dns_record_source  source:8;
+       enum dns_lookup_status  status:8;
+       struct afs_vlserver     *server;
+};
+
+struct afs_vlserver_list {
+       struct rcu_head         rcu;
+       atomic_t                usage;
+       u8                      nr_servers;
+       u8                      index;          /* Server currently in use */
+       u8                      preferred;      /* Preferred server */
+       enum dns_record_source  source:8;
+       enum dns_lookup_status  status:8;
+       rwlock_t                lock;
+       struct afs_vlserver_entry servers[];
+};
+
 /*
  * Cached VLDB entry.
  *
@@ -403,8 +484,12 @@ struct afs_server {
 #define AFS_SERVER_FL_PROBING  6               /* Fileserver is being probed */
 #define AFS_SERVER_FL_NO_IBULK 7               /* Fileserver doesn't support FS.InlineBulkStatus */
 #define AFS_SERVER_FL_MAY_HAVE_CB 8            /* May have callbacks on this fileserver */
+#define AFS_SERVER_FL_IS_YFS   9               /* Server is YFS not AFS */
+#define AFS_SERVER_FL_NO_RM2   10              /* Fileserver doesn't support YFS.RemoveFile2 */
+#define AFS_SERVER_FL_HAVE_EPOCH 11            /* ->epoch is valid */
        atomic_t                usage;
        u32                     addr_version;   /* Address list version */
+       u32                     cm_epoch;       /* Server RxRPC epoch */
 
        /* file service access */
        rwlock_t                fs_lock;        /* access lock */
@@ -413,6 +498,26 @@ struct afs_server {
        struct hlist_head       cb_volumes;     /* List of volume interests on this server */
        unsigned                cb_s_break;     /* Break-everything counter. */
        rwlock_t                cb_break_lock;  /* Volume finding lock */
+
+       /* Probe state */
+       wait_queue_head_t       probe_wq;
+       atomic_t                probe_outstanding;
+       spinlock_t              probe_lock;
+       struct {
+               unsigned int    rtt;            /* RTT as ktime/64 */
+               u32             abort_code;
+               u32             cm_epoch;
+               short           error;
+               bool            have_result;
+               bool            responded:1;
+               bool            is_yfs:1;
+               bool            not_yfs:1;
+               bool            local_failure:1;
+               bool            no_epoch:1;
+               bool            cm_probed:1;
+               bool            said_rebooted:1;
+               bool            said_inconsistent:1;
+       } probe;
 };
 
 /*
@@ -447,8 +552,8 @@ struct afs_server_entry {
 
 struct afs_server_list {
        refcount_t              usage;
-       unsigned short          nr_servers;
-       unsigned short          index;          /* Server currently in use */
+       unsigned char           nr_servers;
+       unsigned char           preferred;      /* Preferred server */
        unsigned short          vnovol_mask;    /* Servers to be skipped due to VNOVOL */
        unsigned int            seq;            /* Set to ->servers_seq when installed */
        rwlock_t                lock;
@@ -550,6 +655,15 @@ struct afs_vnode {
        afs_callback_type_t     cb_type;        /* type of callback */
 };
 
+static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+       return vnode->cache;
+#else
+       return NULL;
+#endif
+}
+
 /*
  * cached security record for one user's attempt to access a vnode
  */
@@ -586,13 +700,31 @@ struct afs_interface {
  */
 struct afs_addr_cursor {
        struct afs_addr_list    *alist;         /* Current address list (pins ref) */
-       struct sockaddr_rxrpc   *addr;
+       unsigned long           tried;          /* Tried addresses */
+       signed char             index;          /* Current address */
+       bool                    responded;      /* T if the current address responded */
+       unsigned short          nr_iterations;  /* Number of address iterations */
+       short                   error;
        u32                     abort_code;
-       unsigned short          start;          /* Starting point in alist->addrs[] */
-       unsigned short          index;          /* Wrapping offset from start to current addr */
+};
+
+/*
+ * Cursor for iterating over a set of volume location servers.
+ */
+struct afs_vl_cursor {
+       struct afs_addr_cursor  ac;
+       struct afs_cell         *cell;          /* The cell we're querying */
+       struct afs_vlserver_list *server_list;  /* Current server list (pins ref) */
+       struct afs_vlserver     *server;        /* Server on which this resides */
+       struct key              *key;           /* Key for the server */
+       unsigned long           untried;        /* Bitmask of untried servers */
+       short                   index;          /* Current server */
        short                   error;
-       bool                    begun;          /* T if we've begun iteration */
-       bool                    responded;      /* T if the current address responded */
+       unsigned short          flags;
+#define AFS_VL_CURSOR_STOP     0x0001          /* Set to cease iteration */
+#define AFS_VL_CURSOR_RETRY    0x0002          /* Set to do a retry */
+#define AFS_VL_CURSOR_RETRIED  0x0004          /* Set if started a retry */
+       unsigned short          nr_iterations;  /* Number of server iterations */
 };
 
 /*
@@ -604,10 +736,11 @@ struct afs_fs_cursor {
        struct afs_server_list  *server_list;   /* Current server list (pins ref) */
        struct afs_cb_interest  *cbi;           /* Server on which this resides (pins ref) */
        struct key              *key;           /* Key for the server */
+       unsigned long           untried;        /* Bitmask of untried servers */
        unsigned int            cb_break;       /* cb_break + cb_s_break before the call */
        unsigned int            cb_break_2;     /* cb_break + cb_s_break (2nd vnode) */
-       unsigned char           start;          /* Initial index in server list */
-       unsigned char           index;          /* Number of servers tried beyond start */
+       short                   index;          /* Current server */
+       short                   error;
        unsigned short          flags;
 #define AFS_FS_CURSOR_STOP     0x0001          /* Set to cease iteration */
 #define AFS_FS_CURSOR_VBUSY    0x0002          /* Set if seen VBUSY */
@@ -615,6 +748,7 @@ struct afs_fs_cursor {
 #define AFS_FS_CURSOR_VNOVOL   0x0008          /* Set if seen VNOVOL */
 #define AFS_FS_CURSOR_CUR_ONLY 0x0010          /* Set if current server only (file lock held) */
 #define AFS_FS_CURSOR_NO_VSLEEP        0x0020          /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+       unsigned short          nr_iterations;  /* Number of server iterations */
 };
 
 /*
@@ -640,12 +774,12 @@ extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
                                                unsigned short,
                                                unsigned short);
 extern void afs_put_addrlist(struct afs_addr_list *);
-extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
-                                                 unsigned short, unsigned short);
-extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
+extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
+                                                     const char *, size_t, char,
+                                                     unsigned short, unsigned short);
+extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
 extern bool afs_iterate_addresses(struct afs_addr_cursor *);
 extern int afs_end_cursor(struct afs_addr_cursor *);
-extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
 
 extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
 extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
@@ -668,6 +802,7 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
  * callback.c
  */
 extern void afs_init_callback_state(struct afs_server *);
+extern void __afs_break_callback(struct afs_vnode *);
 extern void afs_break_callback(struct afs_vnode *);
 extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
 
@@ -688,10 +823,13 @@ static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
        return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
 }
 
-static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode,
-                                           struct afs_cb_interest *cbi)
+static inline bool afs_cb_is_broken(unsigned int cb_break,
+                                   const struct afs_vnode *vnode,
+                                   const struct afs_cb_interest *cbi)
 {
-       return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break;
+       return !cbi || cb_break != (vnode->cb_break +
+                                   cbi->server->cb_s_break +
+                                   vnode->volume->cb_v_break);
 }
 
 /*
@@ -781,7 +919,7 @@ extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
 extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
 extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64,
                         struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool, u64);
+extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
 extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
 extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
                          struct afs_fid *, struct afs_file_status *);
@@ -797,7 +935,7 @@ extern int afs_fs_release_lock(struct afs_fs_cursor *);
 extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
                                        struct afs_addr_cursor *, struct key *);
 extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
-                                  struct afs_addr_cursor *, struct key *);
+                                  struct afs_addr_cursor *, struct key *, unsigned int, bool);
 extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
                                     struct afs_fid *, struct afs_file_status *,
                                     struct afs_callback *, unsigned int,
@@ -806,6 +944,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
                               struct afs_fid *, struct afs_file_status *,
                               struct afs_callback *, struct afs_volsync *);
 
+/*
+ * fs_probe.c
+ */
+extern void afs_fileserver_probe_result(struct afs_call *);
+extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *);
+extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+
 /*
  * inode.c
  */
@@ -922,7 +1067,6 @@ extern int __net_init afs_open_socket(struct afs_net *);
 extern void __net_exit afs_close_socket(struct afs_net *);
 extern void afs_charge_preallocation(struct work_struct *);
 extern void afs_put_call(struct afs_call *);
-extern int afs_queue_call_work(struct afs_call *);
 extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
 extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
                                            const struct afs_call_type *,
@@ -930,12 +1074,39 @@ extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 extern void afs_flat_call_destructor(struct afs_call *);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
-extern int afs_protocol_error(struct afs_call *, int);
+extern int afs_extract_data(struct afs_call *, bool);
+extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause);
+
+static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
+{
+       call->kvec[0].iov_base = buf;
+       call->kvec[0].iov_len = size;
+       iov_iter_kvec(&call->iter, READ, call->kvec, 1, size);
+}
+
+static inline void afs_extract_to_tmp(struct afs_call *call)
+{
+       afs_extract_begin(call, &call->tmp, sizeof(call->tmp));
+}
+
+static inline void afs_extract_to_tmp64(struct afs_call *call)
+{
+       afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64));
+}
+
+static inline void afs_extract_discard(struct afs_call *call, size_t size)
+{
+       iov_iter_discard(&call->iter, READ, size);
+}
+
+static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
+{
+       afs_extract_begin(call, call->buffer, size);
+}
 
 static inline int afs_transfer_reply(struct afs_call *call)
 {
-       return afs_extract_data(call, call->buffer, call->reply_max, false);
+       return afs_extract_data(call, false);
 }
 
 static inline bool afs_check_call_state(struct afs_call *call,
@@ -1012,7 +1183,6 @@ extern void afs_put_server(struct afs_net *, struct afs_server *);
 extern void afs_manage_servers(struct work_struct *);
 extern void afs_servers_timer(struct timer_list *);
 extern void __net_exit afs_purge_servers(struct afs_net *);
-extern bool afs_probe_fileserver(struct afs_fs_cursor *);
 extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
 
 /*
@@ -1039,14 +1209,51 @@ extern void afs_fs_exit(void);
 /*
  * vlclient.c
  */
-extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
-                                                        struct afs_addr_cursor *,
-                                                        struct key *, const char *, int);
-extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
-                                               struct key *, const uuid_t *);
-extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
-extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
-                                                    struct key *, const uuid_t *);
+extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
+                                                        const char *, int);
+extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
+extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *,
+                                  struct afs_vlserver *, unsigned int, bool);
+extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
+
+/*
+ * vl_probe.c
+ */
+extern void afs_vlserver_probe_result(struct afs_call *);
+extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *);
+extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long);
+
+/*
+ * vl_rotate.c
+ */
+extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
+                                        struct afs_cell *, struct key *);
+extern bool afs_select_vlserver(struct afs_vl_cursor *);
+extern bool afs_select_current_vlserver(struct afs_vl_cursor *);
+extern int afs_end_vlserver_operation(struct afs_vl_cursor *);
+
+/*
+ * vlserver_list.c
+ */
+static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver)
+{
+       atomic_inc(&vlserver->usage);
+       return vlserver;
+}
+
+static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist)
+{
+       if (vllist)
+               atomic_inc(&vllist->usage);
+       return vllist;
+}
+
+extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short);
+extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *);
+extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int);
+extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *);
+extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
+                                                          const void *, size_t);
 
 /*
  * volume.c
@@ -1089,6 +1296,36 @@ extern int afs_launder_page(struct page *);
 extern const struct xattr_handler *afs_xattr_handlers[];
 extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
 
+/*
+ * yfsclient.c
+ */
+extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool);
+extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
+extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, u64,
+                             struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, u64,
+                        struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
+extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
+extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
+extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
+                         struct afs_fid *, struct afs_file_status *);
+extern int yfs_fs_rename(struct afs_fs_cursor *, const char *,
+                        struct afs_vnode *, const char *, u64, u64);
+extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
+                            pgoff_t, pgoff_t, unsigned, unsigned);
+extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
+extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
+extern int yfs_fs_extend_lock(struct afs_fs_cursor *);
+extern int yfs_fs_release_lock(struct afs_fs_cursor *);
+extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
+                              struct afs_fid *, struct afs_file_status *,
+                              struct afs_callback *, struct afs_volsync *);
+extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
+                                    struct afs_fid *, struct afs_file_status *,
+                                    struct afs_callback *, unsigned int,
+                                    struct afs_volsync *);
 
 /*
  * Miscellaneous inline functions.
@@ -1120,6 +1357,17 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
        }
 }
 
+static inline int afs_io_error(struct afs_call *call, enum afs_io_error where)
+{
+       trace_afs_io_error(call->debug_id, -EIO, where);
+       return -EIO;
+}
+
+static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
+{
+       trace_afs_file_error(vnode, -EIO, where);
+       return -EIO;
+}
 
 /*****************************************************************************/
 /*
index 99fd13500a97f9e77e2cbf603ae012bc460d1a2f..2e51c6994148f30f4ec8d858b1e318b1d58980c0 100644 (file)
@@ -130,9 +130,10 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
                        goto error_no_page;
                }
 
-               ret = -EIO;
-               if (PageError(page))
+               if (PageError(page)) {
+                       ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt);
                        goto error;
+               }
 
                buf = kmap_atomic(page);
                memcpy(devname, buf, size);
index 9101f62707af2da3dbff5e33c6067d0cafbb9013..be2ee3bbd0a953349ccba4a30eecbd2366b840c1 100644 (file)
 #include <linux/uaccess.h>
 #include "internal.h"
 
+struct afs_vl_seq_net_private {
+       struct seq_net_private          seq;    /* Must be first */
+       struct afs_vlserver_list        *vllist;
+};
+
 static inline struct afs_net *afs_seq2net(struct seq_file *m)
 {
        return afs_net(seq_file_net(m));
@@ -32,16 +37,24 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m)
  */
 static int afs_proc_cells_show(struct seq_file *m, void *v)
 {
-       struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
+       struct afs_vlserver_list *vllist;
+       struct afs_cell *cell;
 
        if (v == SEQ_START_TOKEN) {
                /* display header on line 1 */
-               seq_puts(m, "USE NAME\n");
+               seq_puts(m, "USE    TTL SV NAME\n");
                return 0;
        }
 
+       cell = list_entry(v, struct afs_cell, proc_link);
+       vllist = rcu_dereference(cell->vl_servers);
+
        /* display one cell per line on subsequent lines */
-       seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name);
+       seq_printf(m, "%3u %6lld %2u %s\n",
+                  atomic_read(&cell->usage),
+                  cell->dns_expiry - ktime_get_real_seconds(),
+                  vllist ? vllist->nr_servers : 0,
+                  cell->name);
        return 0;
 }
 
@@ -208,7 +221,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
                return 0;
        }
 
-       seq_printf(m, "%3d %08x %s\n",
+       seq_printf(m, "%3d %08llx %s\n",
                   atomic_read(&vol->usage), vol->vid,
                   afs_vol_types[vol->type]);
 
@@ -247,61 +260,102 @@ static const struct seq_operations afs_proc_cell_volumes_ops = {
        .show   = afs_proc_cell_volumes_show,
 };
 
+static const char *const dns_record_sources[NR__dns_record_source + 1] = {
+       [DNS_RECORD_UNAVAILABLE]        = "unav",
+       [DNS_RECORD_FROM_CONFIG]        = "cfg",
+       [DNS_RECORD_FROM_DNS_A]         = "A",
+       [DNS_RECORD_FROM_DNS_AFSDB]     = "AFSDB",
+       [DNS_RECORD_FROM_DNS_SRV]       = "SRV",
+       [DNS_RECORD_FROM_NSS]           = "nss",
+       [NR__dns_record_source]         = "[weird]"
+};
+
+static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = {
+       [DNS_LOOKUP_NOT_DONE]           = "no-lookup",
+       [DNS_LOOKUP_GOOD]               = "good",
+       [DNS_LOOKUP_GOOD_WITH_BAD]      = "good/bad",
+       [DNS_LOOKUP_BAD]                = "bad",
+       [DNS_LOOKUP_GOT_NOT_FOUND]      = "not-found",
+       [DNS_LOOKUP_GOT_LOCAL_FAILURE]  = "local-failure",
+       [DNS_LOOKUP_GOT_TEMP_FAILURE]   = "temp-failure",
+       [DNS_LOOKUP_GOT_NS_FAILURE]     = "ns-failure",
+       [NR__dns_lookup_status]         = "[weird]"
+};
+
 /*
  * Display the list of Volume Location servers we're using for a cell.
  */
 static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
 {
-       struct sockaddr_rxrpc *addr = v;
+       const struct afs_vl_seq_net_private *priv = m->private;
+       const struct afs_vlserver_list *vllist = priv->vllist;
+       const struct afs_vlserver_entry *entry;
+       const struct afs_vlserver *vlserver;
+       const struct afs_addr_list *alist;
+       int i;
 
-       /* display header on line 1 */
-       if (v == (void *)1) {
-               seq_puts(m, "ADDRESS\n");
+       if (v == SEQ_START_TOKEN) {
+               seq_printf(m, "# source %s, status %s\n",
+                          dns_record_sources[vllist->source],
+                          dns_lookup_statuses[vllist->status]);
                return 0;
        }
 
-       /* display one cell per line on subsequent lines */
-       seq_printf(m, "%pISp\n", &addr->transport);
+       entry = v;
+       vlserver = entry->server;
+       alist = rcu_dereference(vlserver->addresses);
+
+       seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n",
+                  vlserver->name, entry->priority, entry->weight,
+                  dns_record_sources[alist ? alist->source : entry->source],
+                  dns_lookup_statuses[alist ? alist->status : entry->status]);
+       if (alist) {
+               for (i = 0; i < alist->nr_addrs; i++)
+                       seq_printf(m, " %c %pISpc\n",
+                                  alist->preferred == i ? '>' : '-',
+                                  &alist->addrs[i].transport);
+       }
        return 0;
 }
 
 static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
        __acquires(rcu)
 {
-       struct afs_addr_list *alist;
+       struct afs_vl_seq_net_private *priv = m->private;
+       struct afs_vlserver_list *vllist;
        struct afs_cell *cell = PDE_DATA(file_inode(m->file));
        loff_t pos = *_pos;
 
        rcu_read_lock();
 
-       alist = rcu_dereference(cell->vl_addrs);
+       vllist = rcu_dereference(cell->vl_servers);
+       priv->vllist = vllist;
 
-       /* allow for the header line */
-       if (!pos)
-               return (void *) 1;
-       pos--;
+       if (pos < 0)
+               *_pos = pos = 0;
+       if (pos == 0)
+               return SEQ_START_TOKEN;
 
-       if (!alist || pos >= alist->nr_addrs)
+       if (!vllist || pos - 1 >= vllist->nr_servers)
                return NULL;
 
-       return alist->addrs + pos;
+       return &vllist->servers[pos - 1];
 }
 
 static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v,
                                          loff_t *_pos)
 {
-       struct afs_addr_list *alist;
-       struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+       struct afs_vl_seq_net_private *priv = m->private;
+       struct afs_vlserver_list *vllist = priv->vllist;
        loff_t pos;
 
-       alist = rcu_dereference(cell->vl_addrs);
-
        pos = *_pos;
-       (*_pos)++;
-       if (!alist || pos >= alist->nr_addrs)
+       pos++;
+       *_pos = pos;
+       if (!vllist || pos - 1 >= vllist->nr_servers)
                return NULL;
 
-       return alist->addrs + pos;
+       return &vllist->servers[pos - 1];
 }
 
 static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v)
@@ -337,11 +391,11 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
                   &server->uuid,
                   atomic_read(&server->usage),
                   &alist->addrs[0].transport,
-                  alist->index == 0 ? "*" : "");
+                  alist->preferred == 0 ? "*" : "");
        for (i = 1; i < alist->nr_addrs; i++)
                seq_printf(m, "                                         %pISpc%s\n",
                           &alist->addrs[i].transport,
-                          alist->index == i ? "*" : "");
+                          alist->preferred == i ? "*" : "");
        return 0;
 }
 
@@ -562,7 +616,7 @@ int afs_proc_cell_setup(struct afs_cell *cell)
 
        if (!proc_create_net_data("vlservers", 0444, dir,
                                  &afs_proc_cell_vlservers_ops,
-                                 sizeof(struct seq_net_private),
+                                 sizeof(struct afs_vl_seq_net_private),
                                  cell) ||
            !proc_create_net_data("volumes", 0444, dir,
                                  &afs_proc_cell_volumes_ops,
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
new file mode 100644 (file)
index 0000000..07bc10f
--- /dev/null
@@ -0,0 +1,163 @@
+/* YFS protocol bits
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define YFS_FS_SERVICE 2500
+#define YFS_CM_SERVICE 2501
+
+#define YFSCBMAX 1024
+
+enum YFS_CM_Operations {
+       YFSCBProbe              = 206,  /* probe client */
+       YFSCBGetLock            = 207,  /* get contents of CM lock table */
+       YFSCBXStatsVersion      = 209,  /* get version of extended statistics */
+       YFSCBGetXStats          = 210,  /* get contents of extended statistics data */
+       YFSCBInitCallBackState3 = 213,  /* initialise callback state, version 3 */
+       YFSCBProbeUuid          = 214,  /* check the client hasn't rebooted */
+       YFSCBGetServerPrefs     = 215,
+       YFSCBGetCellServDV      = 216,
+       YFSCBGetLocalCell       = 217,
+       YFSCBGetCacheConfig     = 218,
+       YFSCBGetCellByNum       = 65537,
+       YFSCBTellMeAboutYourself = 65538, /* get client capabilities */
+       YFSCBCallBack           = 64204,
+};
+
+enum YFS_FS_Operations {
+       YFSFETCHACL             = 64131, /* YFS Fetch file ACL */
+       YFSFETCHSTATUS          = 64132, /* YFS Fetch file status */
+       YFSSTOREACL             = 64134, /* YFS Store file ACL */
+       YFSSTORESTATUS          = 64135, /* YFS Store file status */
+       YFSREMOVEFILE           = 64136, /* YFS Remove a file */
+       YFSCREATEFILE           = 64137, /* YFS Create a file */
+       YFSRENAME               = 64138, /* YFS Rename or move a file or directory */
+       YFSSYMLINK              = 64139, /* YFS Create a symbolic link */
+       YFSLINK                 = 64140, /* YFS Create a hard link */
+       YFSMAKEDIR              = 64141, /* YFS Create a directory */
+       YFSREMOVEDIR            = 64142, /* YFS Remove a directory */
+       YFSGETVOLUMESTATUS      = 64149, /* YFS Get volume status information */
+       YFSSETVOLUMESTATUS      = 64150, /* YFS Set volume status information */
+       YFSSETLOCK              = 64156, /* YFS Request a file lock */
+       YFSEXTENDLOCK           = 64157, /* YFS Extend a file lock */
+       YFSRELEASELOCK          = 64158, /* YFS Release a file lock */
+       YFSLOOKUP               = 64161, /* YFS lookup file in directory */
+       YFSFLUSHCPS             = 64165,
+       YFSFETCHOPAQUEACL       = 64168,
+       YFSWHOAMI               = 64170,
+       YFSREMOVEACL            = 64171,
+       YFSREMOVEFILE2          = 64173,
+       YFSSTOREOPAQUEACL2      = 64174,
+       YFSINLINEBULKSTATUS     = 64536, /* YFS Fetch multiple file statuses with errors */
+       YFSFETCHDATA64          = 64537, /* YFS Fetch file data */
+       YFSSTOREDATA64          = 64538, /* YFS Store file data */
+       YFSUPDATESYMLINK        = 64540,
+};
+
+struct yfs_xdr_u64 {
+       __be32                  msw;
+       __be32                  lsw;
+} __packed;
+
+static inline u64 xdr_to_u64(const struct yfs_xdr_u64 x)
+{
+       return ((u64)ntohl(x.msw) << 32) | ntohl(x.lsw);
+}
+
+static inline struct yfs_xdr_u64 u64_to_xdr(const u64 x)
+{
+       return (struct yfs_xdr_u64){ .msw = htonl(x >> 32), .lsw = htonl(x) };
+}
+
+struct yfs_xdr_vnode {
+       struct yfs_xdr_u64      lo;
+       __be32                  hi;
+       __be32                  unique;
+} __packed;
+
+struct yfs_xdr_YFSFid {
+       struct yfs_xdr_u64      volume;
+       struct yfs_xdr_vnode    vnode;
+} __packed;
+
+
+struct yfs_xdr_YFSFetchStatus {
+       __be32                  type;
+       __be32                  nlink;
+       struct yfs_xdr_u64      size;
+       struct yfs_xdr_u64      data_version;
+       struct yfs_xdr_u64      author;
+       struct yfs_xdr_u64      owner;
+       struct yfs_xdr_u64      group;
+       __be32                  mode;
+       __be32                  caller_access;
+       __be32                  anon_access;
+       struct yfs_xdr_vnode    parent;
+       __be32                  data_access_protocol;
+       struct yfs_xdr_u64      mtime_client;
+       struct yfs_xdr_u64      mtime_server;
+       __be32                  lock_count;
+       __be32                  abort_code;
+} __packed;
+
+struct yfs_xdr_YFSCallBack {
+       __be32                  version;
+       struct yfs_xdr_u64      expiration_time;
+       __be32                  type;
+} __packed;
+
+struct yfs_xdr_YFSStoreStatus {
+       __be32                  mask;
+       __be32                  mode;
+       struct yfs_xdr_u64      mtime_client;
+       struct yfs_xdr_u64      owner;
+       struct yfs_xdr_u64      group;
+} __packed;
+
+struct yfs_xdr_RPCFlags {
+       __be32                  rpc_flags;
+} __packed;
+
+struct yfs_xdr_YFSVolSync {
+       struct yfs_xdr_u64      vol_creation_date;
+       struct yfs_xdr_u64      vol_update_date;
+       struct yfs_xdr_u64      max_quota;
+       struct yfs_xdr_u64      blocks_in_use;
+       struct yfs_xdr_u64      blocks_avail;
+} __packed;
+
+enum yfs_volume_type {
+       yfs_volume_type_ro = 0,
+       yfs_volume_type_rw = 1,
+};
+
+#define yfs_FVSOnline          0x1
+#define yfs_FVSInservice       0x2
+#define yfs_FVSBlessed         0x4
+#define yfs_FVSNeedsSalvage    0x8
+
+struct yfs_xdr_YFSFetchVolumeStatus {
+       struct yfs_xdr_u64      vid;
+       struct yfs_xdr_u64      parent_id;
+       __be32                  flags;
+       __be32                  type;
+       struct yfs_xdr_u64      max_quota;
+       struct yfs_xdr_u64      blocks_in_use;
+       struct yfs_xdr_u64      part_blocks_avail;
+       struct yfs_xdr_u64      part_max_blocks;
+       struct yfs_xdr_u64      vol_copy_date;
+       struct yfs_xdr_u64      vol_backup_date;
+} __packed;
+
+struct yfs_xdr_YFSStoreVolumeStatus {
+       __be32                  mask;
+       struct yfs_xdr_u64      min_quota;
+       struct yfs_xdr_u64      max_quota;
+       struct yfs_xdr_u64      file_quota;
+} __packed;
index 1faef56b12bd3f9591b2acc29ce89c6689e224e6..00504254c1c24b6186ec676edcfd1fdcdfffbc63 100644 (file)
 #include "internal.h"
 #include "afs_fs.h"
 
-/*
- * Initialise a filesystem server cursor for iterating over FS servers.
- */
-static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-       memset(fc, 0, sizeof(*fc));
-}
-
 /*
  * Begin an operation on the fileserver.
  *
@@ -35,13 +27,14 @@ static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode
 bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
                               struct key *key)
 {
-       afs_init_fs_cursor(fc, vnode);
+       memset(fc, 0, sizeof(*fc));
        fc->vnode = vnode;
        fc->key = key;
        fc->ac.error = SHRT_MAX;
+       fc->error = -EDESTADDRREQ;
 
        if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
-               fc->ac.error = -EINTR;
+               fc->error = -EINTR;
                fc->flags |= AFS_FS_CURSOR_STOP;
                return false;
        }
@@ -65,12 +58,15 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
        fc->server_list = afs_get_serverlist(vnode->volume->servers);
        read_unlock(&vnode->volume->servers_lock);
 
+       fc->untried = (1UL << fc->server_list->nr_servers) - 1;
+       fc->index = READ_ONCE(fc->server_list->preferred);
+
        cbi = vnode->cb_interest;
        if (cbi) {
                /* See if the vnode's preferred record is still available */
                for (i = 0; i < fc->server_list->nr_servers; i++) {
                        if (fc->server_list->servers[i].cb_interest == cbi) {
-                               fc->start = i;
+                               fc->index = i;
                                goto found_interest;
                        }
                }
@@ -80,7 +76,7 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
                 * and have to return an error.
                 */
                if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
-                       fc->ac.error = -ESTALE;
+                       fc->error = -ESTALE;
                        return false;
                }
 
@@ -94,12 +90,9 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
 
                afs_put_cb_interest(afs_v2net(vnode), cbi);
                cbi = NULL;
-       } else {
-               fc->start = READ_ONCE(fc->server_list->index);
        }
 
 found_interest:
-       fc->index = fc->start;
        return true;
 }
 
@@ -117,7 +110,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
        default:                m = "busy";             break;
        }
 
-       pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
+       pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
 }
 
 /*
@@ -127,7 +120,7 @@ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
 {
        msleep_interruptible(1000);
        if (signal_pending(current)) {
-               fc->ac.error = -ERESTARTSYS;
+               fc->error = -ERESTARTSYS;
                return false;
        }
 
@@ -143,27 +136,32 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
        struct afs_addr_list *alist;
        struct afs_server *server;
        struct afs_vnode *vnode = fc->vnode;
+       u32 rtt, abort_code;
+       int error = fc->ac.error, i;
 
-       _enter("%u/%u,%u/%u,%d,%d",
-              fc->index, fc->start,
-              fc->ac.index, fc->ac.start,
-              fc->ac.error, fc->ac.abort_code);
+       _enter("%lx[%d],%lx[%d],%d,%d",
+              fc->untried, fc->index,
+              fc->ac.tried, fc->ac.index,
+              error, fc->ac.abort_code);
 
        if (fc->flags & AFS_FS_CURSOR_STOP) {
                _leave(" = f [stopped]");
                return false;
        }
 
+       fc->nr_iterations++;
+
        /* Evaluate the result of the previous operation, if there was one. */
-       switch (fc->ac.error) {
+       switch (error) {
        case SHRT_MAX:
                goto start;
 
        case 0:
        default:
                /* Success or local failure.  Stop. */
+               fc->error = error;
                fc->flags |= AFS_FS_CURSOR_STOP;
-               _leave(" = f [okay/local %d]", fc->ac.error);
+               _leave(" = f [okay/local %d]", error);
                return false;
 
        case -ECONNABORTED:
@@ -178,7 +176,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * - May indicate that the fileserver couldn't attach to the vol.
                         */
                        if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
-                               fc->ac.error = -EREMOTEIO;
+                               fc->error = -EREMOTEIO;
                                goto next_server;
                        }
 
@@ -187,12 +185,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                        write_unlock(&vnode->volume->servers_lock);
 
                        set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
-                       fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
-                       if (fc->ac.error < 0)
-                               goto failed;
+                       error = afs_check_volume_status(vnode->volume, fc->key);
+                       if (error < 0)
+                               goto failed_set_error;
 
                        if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
-                               fc->ac.error = -ENOMEDIUM;
+                               fc->error = -ENOMEDIUM;
                                goto failed;
                        }
 
@@ -200,7 +198,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * it's the fileserver having trouble.
                         */
                        if (vnode->volume->servers == fc->server_list) {
-                               fc->ac.error = -EREMOTEIO;
+                               fc->error = -EREMOTEIO;
                                goto next_server;
                        }
 
@@ -215,7 +213,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                case VONLINE:
                case VDISKFULL:
                case VOVERQUOTA:
-                       fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+                       fc->error = afs_abort_to_error(fc->ac.abort_code);
                        goto next_server;
 
                case VOFFLINE:
@@ -224,11 +222,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                                clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
                        }
                        if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
-                               fc->ac.error = -EADV;
+                               fc->error = -EADV;
                                goto failed;
                        }
                        if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
-                               fc->ac.error = -ESTALE;
+                               fc->error = -ESTALE;
                                goto failed;
                        }
                        goto busy;
@@ -240,7 +238,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * have a file lock we need to maintain.
                         */
                        if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
-                               fc->ac.error = -EBUSY;
+                               fc->error = -EBUSY;
                                goto failed;
                        }
                        if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
@@ -269,16 +267,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * honour, just in case someone sets up a loop.
                         */
                        if (fc->flags & AFS_FS_CURSOR_VMOVED) {
-                               fc->ac.error = -EREMOTEIO;
+                               fc->error = -EREMOTEIO;
                                goto failed;
                        }
                        fc->flags |= AFS_FS_CURSOR_VMOVED;
 
                        set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
                        set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
-                       fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
-                       if (fc->ac.error < 0)
-                               goto failed;
+                       error = afs_check_volume_status(vnode->volume, fc->key);
+                       if (error < 0)
+                               goto failed_set_error;
 
                        /* If the server list didn't change, then the VLDB is
                         * out of sync with the fileservers.  This is hopefully
@@ -290,7 +288,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * TODO: Retry a few times with sleeps.
                         */
                        if (vnode->volume->servers == fc->server_list) {
-                               fc->ac.error = -ENOMEDIUM;
+                               fc->error = -ENOMEDIUM;
                                goto failed;
                        }
 
@@ -299,20 +297,25 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                default:
                        clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
                        clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
-                       fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+                       fc->error = afs_abort_to_error(fc->ac.abort_code);
                        goto failed;
                }
 
+       case -ETIMEDOUT:
+       case -ETIME:
+               if (fc->error != -EDESTADDRREQ)
+                       goto iterate_address;
+               /* Fall through */
        case -ENETUNREACH:
        case -EHOSTUNREACH:
        case -ECONNREFUSED:
-       case -ETIMEDOUT:
-       case -ETIME:
                _debug("no conn");
+               fc->error = error;
                goto iterate_address;
 
        case -ECONNRESET:
                _debug("call reset");
+               fc->error = error;
                goto failed;
        }
 
@@ -328,15 +331,57 @@ start:
        /* See if we need to do an update of the volume record.  Note that the
         * volume may have moved or even have been deleted.
         */
-       fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
-       if (fc->ac.error < 0)
-               goto failed;
+       error = afs_check_volume_status(vnode->volume, fc->key);
+       if (error < 0)
+               goto failed_set_error;
 
        if (!afs_start_fs_iteration(fc, vnode))
                goto failed;
 
-use_server:
-       _debug("use");
+       _debug("__ VOL %llx __", vnode->volume->vid);
+       error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list);
+       if (error < 0)
+               goto failed_set_error;
+
+pick_server:
+       _debug("pick [%lx]", fc->untried);
+
+       error = afs_wait_for_fs_probes(fc->server_list, fc->untried);
+       if (error < 0)
+               goto failed_set_error;
+
+       /* Pick the untried server with the lowest RTT.  If we have outstanding
+        * callbacks, we stick with the server we're already using if we can.
+        */
+       if (fc->cbi) {
+               _debug("cbi %u", fc->index);
+               if (test_bit(fc->index, &fc->untried))
+                       goto selected_server;
+               afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+               fc->cbi = NULL;
+               _debug("nocbi");
+       }
+
+       fc->index = -1;
+       rtt = U32_MAX;
+       for (i = 0; i < fc->server_list->nr_servers; i++) {
+               struct afs_server *s = fc->server_list->servers[i].server;
+
+               if (!test_bit(i, &fc->untried) || !s->probe.responded)
+                       continue;
+               if (s->probe.rtt < rtt) {
+                       fc->index = i;
+                       rtt = s->probe.rtt;
+               }
+       }
+
+       if (fc->index == -1)
+               goto no_more_servers;
+
+selected_server:
+       _debug("use %d", fc->index);
+       __clear_bit(fc->index, &fc->untried);
+
        /* We're starting on a different fileserver from the list.  We need to
         * check it, create a callback intercept, find its address list and
         * probe its capabilities before we use it.
@@ -354,10 +399,10 @@ use_server:
         * break request before we've finished decoding the reply and
         * installing the vnode.
         */
-       fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
-                                                      fc->index);
-       if (fc->ac.error < 0)
-               goto failed;
+       error = afs_register_server_cb_interest(vnode, fc->server_list,
+                                               fc->index);
+       if (error < 0)
+               goto failed_set_error;
 
        fc->cbi = afs_get_cb_interest(vnode->cb_interest);
 
@@ -369,66 +414,88 @@ use_server:
 
        memset(&fc->ac, 0, sizeof(fc->ac));
 
-       /* Probe the current fileserver if we haven't done so yet. */
-       if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
-               fc->ac.alist = afs_get_addrlist(alist);
-
-               if (!afs_probe_fileserver(fc)) {
-                       switch (fc->ac.error) {
-                       case -ENOMEM:
-                       case -ERESTARTSYS:
-                       case -EINTR:
-                               goto failed;
-                       default:
-                               goto next_server;
-                       }
-               }
-       }
-
        if (!fc->ac.alist)
                fc->ac.alist = alist;
        else
                afs_put_addrlist(alist);
 
-       fc->ac.start = READ_ONCE(alist->index);
-       fc->ac.index = fc->ac.start;
+       fc->ac.index = -1;
 
 iterate_address:
        ASSERT(fc->ac.alist);
-       _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
        /* Iterate over the current server's address list to try and find an
         * address on which it will respond to us.
         */
        if (!afs_iterate_addresses(&fc->ac))
                goto next_server;
 
+       _debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs);
+
        _leave(" = t");
        return true;
 
 next_server:
        _debug("next");
        afs_end_cursor(&fc->ac);
-       afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
-       fc->cbi = NULL;
-       fc->index++;
-       if (fc->index >= fc->server_list->nr_servers)
-               fc->index = 0;
-       if (fc->index != fc->start)
-               goto use_server;
+       goto pick_server;
 
+no_more_servers:
        /* That's all the servers poked to no good effect.  Try again if some
         * of them were busy.
         */
        if (fc->flags & AFS_FS_CURSOR_VBUSY)
                goto restart_from_beginning;
 
-       fc->ac.error = -EDESTADDRREQ;
-       goto failed;
+       abort_code = 0;
+       error = -EDESTADDRREQ;
+       for (i = 0; i < fc->server_list->nr_servers; i++) {
+               struct afs_server *s = fc->server_list->servers[i].server;
+               int probe_error = READ_ONCE(s->probe.error);
+
+               switch (probe_error) {
+               case 0:
+                       continue;
+               default:
+                       if (error == -ETIMEDOUT ||
+                           error == -ETIME)
+                               continue;
+               case -ETIMEDOUT:
+               case -ETIME:
+                       if (error == -ENOMEM ||
+                           error == -ENONET)
+                               continue;
+               case -ENOMEM:
+               case -ENONET:
+                       if (error == -ENETUNREACH)
+                               continue;
+               case -ENETUNREACH:
+                       if (error == -EHOSTUNREACH)
+                               continue;
+               case -EHOSTUNREACH:
+                       if (error == -ECONNREFUSED)
+                               continue;
+               case -ECONNREFUSED:
+                       if (error == -ECONNRESET)
+                               continue;
+               case -ECONNRESET: /* Responded, but call expired. */
+                       if (error == -ECONNABORTED)
+                               continue;
+               case -ECONNABORTED:
+                       abort_code = s->probe.abort_code;
+                       error = probe_error;
+                       continue;
+               }
+       }
+
+       if (error == -ECONNABORTED)
+               error = afs_abort_to_error(abort_code);
 
+failed_set_error:
+       fc->error = error;
 failed:
        fc->flags |= AFS_FS_CURSOR_STOP;
        afs_end_cursor(&fc->ac);
-       _leave(" = f [failed %d]", fc->ac.error);
+       _leave(" = f [failed %d]", fc->error);
        return false;
 }
 
@@ -442,13 +509,14 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
        struct afs_vnode *vnode = fc->vnode;
        struct afs_cb_interest *cbi = vnode->cb_interest;
        struct afs_addr_list *alist;
+       int error = fc->ac.error;
 
        _enter("");
 
-       switch (fc->ac.error) {
+       switch (error) {
        case SHRT_MAX:
                if (!cbi) {
-                       fc->ac.error = -ESTALE;
+                       fc->error = -ESTALE;
                        fc->flags |= AFS_FS_CURSOR_STOP;
                        return false;
                }
@@ -461,25 +529,26 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
                afs_get_addrlist(alist);
                read_unlock(&cbi->server->fs_lock);
                if (!alist) {
-                       fc->ac.error = -ESTALE;
+                       fc->error = -ESTALE;
                        fc->flags |= AFS_FS_CURSOR_STOP;
                        return false;
                }
 
                memset(&fc->ac, 0, sizeof(fc->ac));
                fc->ac.alist = alist;
-               fc->ac.start = READ_ONCE(alist->index);
-               fc->ac.index = fc->ac.start;
+               fc->ac.index = -1;
                goto iterate_address;
 
        case 0:
        default:
                /* Success or local failure.  Stop. */
+               fc->error = error;
                fc->flags |= AFS_FS_CURSOR_STOP;
-               _leave(" = f [okay/local %d]", fc->ac.error);
+               _leave(" = f [okay/local %d]", error);
                return false;
 
        case -ECONNABORTED:
+               fc->error = afs_abort_to_error(fc->ac.abort_code);
                fc->flags |= AFS_FS_CURSOR_STOP;
                _leave(" = f [abort]");
                return false;
@@ -490,6 +559,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
        case -ETIMEDOUT:
        case -ETIME:
                _debug("no conn");
+               fc->error = error;
                goto iterate_address;
        }
 
@@ -506,13 +576,66 @@ iterate_address:
        return false;
 }
 
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
+{
+       static int count;
+       int i;
+
+       if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+               return;
+       count++;
+
+       rcu_read_lock();
+
+       pr_notice("EDESTADDR occurred\n");
+       pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
+                 fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
+       pr_notice("FC: ut=%lx ix=%d ni=%u\n",
+                 fc->untried, fc->index, fc->nr_iterations);
+
+       if (fc->server_list) {
+               const struct afs_server_list *sl = fc->server_list;
+               pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
+                         sl->nr_servers, sl->preferred, sl->vnovol_mask);
+               for (i = 0; i < sl->nr_servers; i++) {
+                       const struct afs_server *s = sl->servers[i].server;
+                       pr_notice("FC: server fl=%lx av=%u %pU\n",
+                                 s->flags, s->addr_version, &s->uuid);
+                       if (s->addresses) {
+                               const struct afs_addr_list *a =
+                                       rcu_dereference(s->addresses);
+                               pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
+                                         a->version,
+                                         a->nr_ipv4, a->nr_addrs, a->max_addrs,
+                                         a->preferred);
+                               pr_notice("FC:  - pr=%lx R=%lx F=%lx\n",
+                                         a->probed, a->responded, a->failed);
+                               if (a == fc->ac.alist)
+                                       pr_notice("FC:  - current\n");
+                       }
+               }
+       }
+
+       pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
+                 fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error,
+                 fc->ac.responded, fc->ac.nr_iterations);
+       rcu_read_unlock();
+}
+
 /*
  * Tidy up a filesystem cursor and unlock the vnode.
  */
 int afs_end_vnode_operation(struct afs_fs_cursor *fc)
 {
        struct afs_net *net = afs_v2net(fc->vnode);
-       int ret;
+
+       if (fc->error == -EDESTADDRREQ ||
+           fc->error == -ENETUNREACH ||
+           fc->error == -EHOSTUNREACH)
+               afs_dump_edestaddrreq(fc);
 
        mutex_unlock(&fc->vnode->io_lock);
 
@@ -520,9 +643,8 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
        afs_put_cb_interest(net, fc->cbi);
        afs_put_serverlist(net, fc->server_list);
 
-       ret = fc->ac.error;
-       if (ret == -ECONNABORTED)
-               afs_abort_to_error(fc->ac.abort_code);
+       if (fc->error == -ECONNABORTED)
+               fc->error = afs_abort_to_error(fc->ac.abort_code);
 
-       return fc->ac.error;
+       return fc->error;
 }
index 77a83790a31f38c9e25ffeaa1c190eb8958e7fa9..59970886690f1b6a3767b72ec33c3c2fe81fd61f 100644 (file)
@@ -16,6 +16,7 @@
 #include <net/af_rxrpc.h>
 #include "internal.h"
 #include "afs_cm.h"
+#include "protocol_yfs.h"
 
 struct workqueue_struct *afs_async_calls;
 
@@ -75,6 +76,18 @@ int afs_open_socket(struct afs_net *net)
        if (ret < 0)
                goto error_2;
 
+       srx.srx_service = YFS_CM_SERVICE;
+       ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+       if (ret < 0)
+               goto error_2;
+
+       /* Ideally, we'd turn on service upgrade here, but we can't because
+        * OpenAFS is buggy and leaks the userStatus field from packet to
+        * packet and between FS packets and CB packets - so if we try to do an
+        * upgrade on an FS packet, OpenAFS will leak that into the CB packet
+        * it sends back to us.
+        */
+
        rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
                                           afs_rx_discard_new_call);
 
@@ -143,6 +156,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
        INIT_WORK(&call->async_work, afs_process_async_call);
        init_waitqueue_head(&call->waitq);
        spin_lock_init(&call->state_lock);
+       call->_iter = &call->iter;
 
        o = atomic_inc_return(&net->nr_outstanding_calls);
        trace_afs_call(call, afs_call_trace_alloc, 1, o,
@@ -176,6 +190,7 @@ void afs_put_call(struct afs_call *call)
 
                afs_put_server(call->net, call->cm_server);
                afs_put_cb_interest(call->net, call->cbi);
+               afs_put_addrlist(call->alist);
                kfree(call->request);
 
                trace_afs_call(call, afs_call_trace_free, 0, o,
@@ -189,21 +204,22 @@ void afs_put_call(struct afs_call *call)
 }
 
 /*
- * Queue the call for actual work.  Returns 0 unconditionally for convenience.
+ * Queue the call for actual work.
  */
-int afs_queue_call_work(struct afs_call *call)
+static void afs_queue_call_work(struct afs_call *call)
 {
-       int u = atomic_inc_return(&call->usage);
+       if (call->type->work) {
+               int u = atomic_inc_return(&call->usage);
 
-       trace_afs_call(call, afs_call_trace_work, u,
-                      atomic_read(&call->net->nr_outstanding_calls),
-                      __builtin_return_address(0));
+               trace_afs_call(call, afs_call_trace_work, u,
+                              atomic_read(&call->net->nr_outstanding_calls),
+                              __builtin_return_address(0));
 
-       INIT_WORK(&call->work, call->type->work);
+               INIT_WORK(&call->work, call->type->work);
 
-       if (!queue_work(afs_wq, &call->work))
-               afs_put_call(call);
-       return 0;
+               if (!queue_work(afs_wq, &call->work))
+                       afs_put_call(call);
+       }
 }
 
 /*
@@ -233,6 +249,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net,
                        goto nomem_free;
        }
 
+       afs_extract_to_buf(call, call->reply_max);
        call->operation_ID = type->op;
        init_waitqueue_head(&call->waitq);
        return call;
@@ -286,7 +303,7 @@ static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
                offset = 0;
        }
 
-       iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+       iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes);
 }
 
 /*
@@ -342,7 +359,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
                   gfp_t gfp, bool async)
 {
-       struct sockaddr_rxrpc *srx = ac->addr;
+       struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index];
        struct rxrpc_call *rxcall;
        struct msghdr msg;
        struct kvec iov[1];
@@ -359,6 +376,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
               atomic_read(&call->net->nr_outstanding_calls));
 
        call->async = async;
+       call->addr_ix = ac->index;
+       call->alist = afs_get_addrlist(ac->alist);
 
        /* Work out the length we're going to transmit.  This is awkward for
         * calls such as FS.StoreData where there's an extra injection of data
@@ -390,6 +409,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
                                         call->debug_id);
        if (IS_ERR(rxcall)) {
                ret = PTR_ERR(rxcall);
+               call->error = ret;
                goto error_kill_call;
        }
 
@@ -401,8 +421,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 
        msg.msg_name            = NULL;
        msg.msg_namelen         = 0;
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1,
-                     call->request_size);
+       iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size);
        msg.msg_control         = NULL;
        msg.msg_controllen      = 0;
        msg.msg_flags           = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
@@ -432,7 +451,7 @@ error_do_abort:
                rxrpc_kernel_abort_call(call->net->socket, rxcall,
                                        RX_USER_ABORT, ret, "KSD");
        } else {
-               iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0);
+               iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0);
                rxrpc_kernel_recv_data(call->net->socket, rxcall,
                                       &msg.msg_iter, false,
                                       &call->abort_code, &call->service_id);
@@ -442,6 +461,8 @@ error_do_abort:
        call->error = ret;
        trace_afs_call_done(call);
 error_kill_call:
+       if (call->type->done)
+               call->type->done(call);
        afs_put_call(call);
        ac->error = ret;
        _leave(" = %d", ret);
@@ -466,14 +487,12 @@ static void afs_deliver_to_call(struct afs_call *call)
               state == AFS_CALL_SV_AWAIT_ACK
               ) {
                if (state == AFS_CALL_SV_AWAIT_ACK) {
-                       struct iov_iter iter;
-
-                       iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0);
+                       iov_iter_kvec(&call->iter, READ, NULL, 0, 0);
                        ret = rxrpc_kernel_recv_data(call->net->socket,
-                                                    call->rxcall, &iter, false,
-                                                    &remote_abort,
+                                                    call->rxcall, &call->iter,
+                                                    false, &remote_abort,
                                                     &call->service_id);
-                       trace_afs_recv_data(call, 0, 0, false, ret);
+                       trace_afs_receive_data(call, &call->iter, false, ret);
 
                        if (ret == -EINPROGRESS || ret == -EAGAIN)
                                return;
@@ -485,10 +504,17 @@ static void afs_deliver_to_call(struct afs_call *call)
                        return;
                }
 
+               if (call->want_reply_time &&
+                   rxrpc_kernel_get_reply_time(call->net->socket,
+                                               call->rxcall,
+                                               &call->reply_time))
+                       call->want_reply_time = false;
+
                ret = call->type->deliver(call);
                state = READ_ONCE(call->state);
                switch (ret) {
                case 0:
+                       afs_queue_call_work(call);
                        if (state == AFS_CALL_CL_PROC_REPLY) {
                                if (call->cbi)
                                        set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
@@ -500,7 +526,6 @@ static void afs_deliver_to_call(struct afs_call *call)
                case -EINPROGRESS:
                case -EAGAIN:
                        goto out;
-               case -EIO:
                case -ECONNABORTED:
                        ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
                        goto done;
@@ -509,6 +534,10 @@ static void afs_deliver_to_call(struct afs_call *call)
                        rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
                                                abort_code, ret, "KIV");
                        goto local_abort;
+               case -EIO:
+                       pr_err("kAFS: Call %u in bad state %u\n",
+                              call->debug_id, state);
+                       /* Fall through */
                case -ENODATA:
                case -EBADMSG:
                case -EMSGSIZE:
@@ -517,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call)
                        if (state != AFS_CALL_CL_AWAIT_REPLY)
                                abort_code = RXGEN_SS_UNMARSHAL;
                        rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-                                               abort_code, -EBADMSG, "KUM");
+                                               abort_code, ret, "KUM");
                        goto local_abort;
                }
        }
 
 done:
+       if (call->type->done)
+               call->type->done(call);
        if (state == AFS_CALL_COMPLETE && call->incoming)
                afs_put_call(call);
 out:
@@ -728,6 +759,7 @@ void afs_charge_preallocation(struct work_struct *work)
                        call->async = true;
                        call->state = AFS_CALL_SV_AWAIT_OP_ID;
                        init_waitqueue_head(&call->waitq);
+                       afs_extract_to_tmp(call);
                }
 
                if (rxrpc_kernel_charge_accept(net->socket,
@@ -773,18 +805,15 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
 {
        int ret;
 
-       _enter("{%zu}", call->offset);
-
-       ASSERTCMP(call->offset, <, 4);
+       _enter("{%zu}", iov_iter_count(call->_iter));
 
        /* the operation ID forms the first four bytes of the request data */
-       ret = afs_extract_data(call, &call->tmp, 4, true);
+       ret = afs_extract_data(call, true);
        if (ret < 0)
                return ret;
 
        call->operation_ID = ntohl(call->tmp);
        afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
-       call->offset = 0;
 
        /* ask the cache manager to route the call (it'll change the call type
         * if successful) */
@@ -825,7 +854,7 @@ void afs_send_empty_reply(struct afs_call *call)
 
        msg.msg_name            = NULL;
        msg.msg_namelen         = 0;
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+       iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0);
        msg.msg_control         = NULL;
        msg.msg_controllen      = 0;
        msg.msg_flags           = 0;
@@ -864,7 +893,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
        iov[0].iov_len          = len;
        msg.msg_name            = NULL;
        msg.msg_namelen         = 0;
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len);
+       iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
        msg.msg_control         = NULL;
        msg.msg_controllen      = 0;
        msg.msg_flags           = 0;
@@ -888,30 +917,19 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 /*
  * Extract a piece of data from the received data socket buffers.
  */
-int afs_extract_data(struct afs_call *call, void *buf, size_t count,
-                    bool want_more)
+int afs_extract_data(struct afs_call *call, bool want_more)
 {
        struct afs_net *net = call->net;
-       struct iov_iter iter;
-       struct kvec iov;
+       struct iov_iter *iter = call->_iter;
        enum afs_call_state state;
        u32 remote_abort = 0;
        int ret;
 
-       _enter("{%s,%zu},,%zu,%d",
-              call->type->name, call->offset, count, want_more);
-
-       ASSERTCMP(call->offset, <=, count);
-
-       iov.iov_base = buf + call->offset;
-       iov.iov_len = count - call->offset;
-       iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset);
+       _enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more);
 
-       ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
+       ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
                                     want_more, &remote_abort,
                                     &call->service_id);
-       call->offset += (count - call->offset) - iov_iter_count(&iter);
-       trace_afs_recv_data(call, count, call->offset, want_more, ret);
        if (ret == 0 || ret == -EAGAIN)
                return ret;
 
@@ -926,7 +944,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
                        break;
                case AFS_CALL_COMPLETE:
                        kdebug("prem complete %d", call->error);
-                       return -EIO;
+                       return afs_io_error(call, afs_io_error_extract);
                default:
                        break;
                }
@@ -940,8 +958,9 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 /*
  * Log protocol error production.
  */
-noinline int afs_protocol_error(struct afs_call *call, int error)
+noinline int afs_protocol_error(struct afs_call *call, int error,
+                               enum afs_eproto_cause cause)
 {
-       trace_afs_protocol_error(call, error, __builtin_return_address(0));
+       trace_afs_protocol_error(call, error, cause);
        return error;
 }
index 81dfedb7879ff9bf56ab4fcca26ef6c90d835de2..5f58a9a17e694a09dbe0d0b70d9dbc0cc9833aa4 100644 (file)
@@ -126,7 +126,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
        bool changed = false;
        int i, j;
 
-       _enter("{%x:%u},%x,%x",
+       _enter("{%llx:%llu},%x,%x",
               vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
 
        rcu_read_lock();
@@ -147,7 +147,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                                        break;
                                }
 
-                               if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) {
+                               if (afs_cb_is_broken(cb_break, vnode,
+                                                    vnode->cb_interest)) {
                                        changed = true;
                                        break;
                                }
@@ -177,7 +178,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                }
        }
 
-       if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest))
+       if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest))
                goto someone_else_changed_it;
 
        /* We need a ref on any permits list we want to copy as we'll have to
@@ -256,7 +257,7 @@ found:
 
        spin_lock(&vnode->lock);
        zap = rcu_access_pointer(vnode->permit_cache);
-       if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) &&
+       if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) &&
            zap == permits)
                rcu_assign_pointer(vnode->permit_cache, replacement);
        else
@@ -289,7 +290,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
        bool valid = false;
        int i, ret;
 
-       _enter("{%x:%u},%x",
+       _enter("{%llx:%llu},%x",
               vnode->fid.vid, vnode->fid.vnode, key_serial(key));
 
        /* check the permits to see if we've got one yet */
@@ -349,7 +350,7 @@ int afs_permission(struct inode *inode, int mask)
        if (mask & MAY_NOT_BLOCK)
                return -ECHILD;
 
-       _enter("{{%x:%u},%lx},%x,",
+       _enter("{{%llx:%llu},%lx},%x,",
               vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
 
        key = afs_request_key(vnode->volume->cell);
index 1d329e6981d515c06bb5b711a1e3880226c2cce8..642afa2e9783c4f95284980dd8054610fa4d49cf 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include "afs_fs.h"
 #include "internal.h"
+#include "protocol_yfs.h"
 
 static unsigned afs_server_gc_delay = 10;      /* Server record timeout in seconds */
 static unsigned afs_server_update_delay = 30;  /* Time till VLDB recheck in secs */
@@ -230,6 +231,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
        rwlock_init(&server->fs_lock);
        INIT_HLIST_HEAD(&server->cb_volumes);
        rwlock_init(&server->cb_break_lock);
+       init_waitqueue_head(&server->probe_wq);
+       spin_lock_init(&server->probe_lock);
 
        afs_inc_servers_outstanding(net);
        _leave(" = %p", server);
@@ -246,41 +249,23 @@ enomem:
 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
                                                 struct key *key, const uuid_t *uuid)
 {
-       struct afs_addr_cursor ac;
-       struct afs_addr_list *alist;
+       struct afs_vl_cursor vc;
+       struct afs_addr_list *alist = NULL;
        int ret;
 
-       ret = afs_set_vl_cursor(&ac, cell);
-       if (ret < 0)
-               return ERR_PTR(ret);
-
-       while (afs_iterate_addresses(&ac)) {
-               if (test_bit(ac.index, &ac.alist->yfs))
-                       alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
-               else
-                       alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
-               switch (ac.error) {
-               case 0:
-                       afs_end_cursor(&ac);
-                       return alist;
-               case -ECONNABORTED:
-                       ac.error = afs_abort_to_error(ac.abort_code);
-                       goto error;
-               case -ENOMEM:
-               case -ENONET:
-                       goto error;
-               case -ENETUNREACH:
-               case -EHOSTUNREACH:
-               case -ECONNREFUSED:
-                       break;
-               default:
-                       ac.error = -EIO;
-                       goto error;
+       ret = -ERESTARTSYS;
+       if (afs_begin_vlserver_operation(&vc, cell, key)) {
+               while (afs_select_vlserver(&vc)) {
+                       if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
+                               alist = afs_yfsvl_get_endpoints(&vc, uuid);
+                       else
+                               alist = afs_vl_get_addrs_u(&vc, uuid);
                }
+
+               ret = afs_end_vlserver_operation(&vc);
        }
 
-error:
-       return ERR_PTR(afs_end_cursor(&ac));
+       return ret < 0 ? ERR_PTR(ret) : alist;
 }
 
 /*
@@ -382,9 +367,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
        struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
        struct afs_addr_cursor ac = {
                .alist  = alist,
-               .start  = alist->index,
-               .index  = 0,
-               .addr   = &alist->addrs[alist->index],
+               .index  = alist->preferred,
                .error  = 0,
        };
        _enter("%p", server);
@@ -392,6 +375,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
        if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
                afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
 
+       wait_var_event(&server->probe_outstanding,
+                      atomic_read(&server->probe_outstanding) == 0);
+
        call_rcu(&server->rcu, afs_server_rcu);
        afs_dec_servers_outstanding(net);
 }
@@ -524,99 +510,6 @@ void afs_purge_servers(struct afs_net *net)
        _leave("");
 }
 
-/*
- * Probe a fileserver to find its capabilities.
- *
- * TODO: Try service upgrade.
- */
-static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
-{
-       _enter("");
-
-       fc->ac.addr = NULL;
-       fc->ac.start = READ_ONCE(fc->ac.alist->index);
-       fc->ac.index = fc->ac.start;
-       fc->ac.error = 0;
-       fc->ac.begun = false;
-
-       while (afs_iterate_addresses(&fc->ac)) {
-               afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
-                                       &fc->ac, fc->key);
-               switch (fc->ac.error) {
-               case 0:
-                       afs_end_cursor(&fc->ac);
-                       set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
-                       return true;
-               case -ECONNABORTED:
-                       fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
-                       goto error;
-               case -ENOMEM:
-               case -ENONET:
-                       goto error;
-               case -ENETUNREACH:
-               case -EHOSTUNREACH:
-               case -ECONNREFUSED:
-               case -ETIMEDOUT:
-               case -ETIME:
-                       break;
-               default:
-                       fc->ac.error = -EIO;
-                       goto error;
-               }
-       }
-
-error:
-       afs_end_cursor(&fc->ac);
-       return false;
-}
-
-/*
- * If we haven't already, try probing the fileserver to get its capabilities.
- * We try not to instigate parallel probes, but it's possible that the parallel
- * probes will fail due to authentication failure when ours would succeed.
- *
- * TODO: Try sending an anonymous probe if an authenticated probe fails.
- */
-bool afs_probe_fileserver(struct afs_fs_cursor *fc)
-{
-       bool success;
-       int ret, retries = 0;
-
-       _enter("");
-
-retry:
-       if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
-               _leave(" = t");
-               return true;
-       }
-
-       if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
-               success = afs_do_probe_fileserver(fc);
-               clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
-               wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
-               _leave(" = t");
-               return success;
-       }
-
-       _debug("wait");
-       ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
-                         TASK_INTERRUPTIBLE);
-       if (ret == -ERESTARTSYS) {
-               fc->ac.error = ret;
-               _leave(" = f [%d]", ret);
-               return false;
-       }
-
-       retries++;
-       if (retries == 4) {
-               fc->ac.error = -ESTALE;
-               _leave(" = f [stale]");
-               return false;
-       }
-       _debug("retry");
-       goto retry;
-}
-
 /*
  * Get an update for a server's address list.
  */
index 8a5760aa583213a608d686b60f0782ecbed648e1..95d0761cdb34ef3c0a214693651292ba08b2def1 100644 (file)
@@ -118,11 +118,11 @@ bool afs_annotate_server_list(struct afs_server_list *new,
        return false;
 
 changed:
-       /* Maintain the same current server as before if possible. */
-       cur = old->servers[old->index].server;
+       /* Maintain the same preferred server as before if possible. */
+       cur = old->servers[old->preferred].server;
        for (j = 0; j < new->nr_servers; j++) {
                if (new->servers[j].server == cur) {
-                       new->index = j;
+                       new->preferred = j;
                        break;
                }
        }
index 4d3e274207fb7aa05aa320b957a03911984cf67d..dcd07fe99871b9b38541293b164ac1cf8328a005 100644 (file)
@@ -406,10 +406,11 @@ static int afs_fill_super(struct super_block *sb,
                inode = afs_iget_pseudo_dir(sb, true);
                sb->s_flags     |= SB_RDONLY;
        } else {
-               sprintf(sb->s_id, "%u", as->volume->vid);
+               sprintf(sb->s_id, "%llu", as->volume->vid);
                afs_activate_volume(as->volume);
                fid.vid         = as->volume->vid;
                fid.vnode       = 1;
+               fid.vnode_hi    = 0;
                fid.unique      = 1;
                inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
        }
@@ -663,7 +664,7 @@ static void afs_destroy_inode(struct inode *inode)
 {
        struct afs_vnode *vnode = AFS_FS_I(inode);
 
-       _enter("%p{%x:%u}", inode, vnode->fid.vid, vnode->fid.vnode);
+       _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode);
 
        _debug("DESTROY INODE %p", inode);
 
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
new file mode 100644 (file)
index 0000000..b4f1a84
--- /dev/null
@@ -0,0 +1,340 @@
+/* AFS vlserver list management.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
+                                       unsigned short port)
+{
+       struct afs_vlserver *vlserver;
+
+       vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
+                          GFP_KERNEL);
+       if (vlserver) {
+               atomic_set(&vlserver->usage, 1);
+               rwlock_init(&vlserver->lock);
+               init_waitqueue_head(&vlserver->probe_wq);
+               spin_lock_init(&vlserver->probe_lock);
+               vlserver->name_len = name_len;
+               vlserver->port = port;
+               memcpy(vlserver->name, name, name_len);
+       }
+       return vlserver;
+}
+
+static void afs_vlserver_rcu(struct rcu_head *rcu)
+{
+       struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
+
+       afs_put_addrlist(rcu_access_pointer(vlserver->addresses));
+       kfree_rcu(vlserver, rcu);
+}
+
+void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver)
+{
+       if (vlserver) {
+               unsigned int u = atomic_dec_return(&vlserver->usage);
+               //_debug("VL PUT %p{%u}", vlserver, u);
+
+               if (u == 0)
+                       call_rcu(&vlserver->rcu, afs_vlserver_rcu);
+       }
+}
+
+struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
+{
+       struct afs_vlserver_list *vllist;
+
+       vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL);
+       if (vllist) {
+               atomic_set(&vllist->usage, 1);
+               rwlock_init(&vllist->lock);
+       }
+
+       return vllist;
+}
+
+void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist)
+{
+       if (vllist) {
+               unsigned int u = atomic_dec_return(&vllist->usage);
+
+               //_debug("VLLS PUT %p{%u}", vllist, u);
+               if (u == 0) {
+                       int i;
+
+                       for (i = 0; i < vllist->nr_servers; i++) {
+                               afs_put_vlserver(net, vllist->servers[i].server);
+                       }
+                       kfree_rcu(vllist, rcu);
+               }
+       }
+}
+
+static u16 afs_extract_le16(const u8 **_b)
+{
+       u16 val;
+
+       val  = (u16)*(*_b)++ << 0;
+       val |= (u16)*(*_b)++ << 8;
+       return val;
+}
+
+/*
+ * Build a VL server address list from a DNS queried server list.
+ */
+static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
+                                                 u8 nr_addrs, u16 port)
+{
+       struct afs_addr_list *alist;
+       const u8 *b = *_b;
+       int ret = -EINVAL;
+
+       alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port);
+       if (!alist)
+               return ERR_PTR(-ENOMEM);
+       if (nr_addrs == 0)
+               return alist;
+
+       for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) {
+               struct dns_server_list_v1_address hdr;
+               __be32 x[4];
+
+               hdr.address_type = *b++;
+
+               switch (hdr.address_type) {
+               case DNS_ADDRESS_IS_IPV4:
+                       if (end - b < 4) {
+                               _leave(" = -EINVAL [short inet]");
+                               goto error;
+                       }
+                       memcpy(x, b, 4);
+                       afs_merge_fs_addr4(alist, x[0], port);
+                       b += 4;
+                       break;
+
+               case DNS_ADDRESS_IS_IPV6:
+                       if (end - b < 16) {
+                               _leave(" = -EINVAL [short inet6]");
+                               goto error;
+                       }
+                       memcpy(x, b, 16);
+                       afs_merge_fs_addr6(alist, x, port);
+                       b += 16;
+                       break;
+
+               default:
+                       _leave(" = -EADDRNOTAVAIL [unknown af %u]",
+                              hdr.address_type);
+                       ret = -EADDRNOTAVAIL;
+                       goto error;
+               }
+       }
+
+       /* Start with IPv6 if available. */
+       if (alist->nr_ipv4 < alist->nr_addrs)
+               alist->preferred = alist->nr_ipv4;
+
+       *_b = b;
+       return alist;
+
+error:
+       *_b = b;
+       afs_put_addrlist(alist);
+       return ERR_PTR(ret);
+}
+
+/*
+ * Build a VL server list from a DNS queried server list.
+ */
+struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
+                                                   const void *buffer,
+                                                   size_t buffer_size)
+{
+       const struct dns_server_list_v1_header *hdr = buffer;
+       struct dns_server_list_v1_server bs;
+       struct afs_vlserver_list *vllist, *previous;
+       struct afs_addr_list *addrs;
+       struct afs_vlserver *server;
+       const u8 *b = buffer, *end = buffer + buffer_size;
+       int ret = -ENOMEM, nr_servers, i, j;
+
+       _enter("");
+
+       /* Check that it's a server list, v1 */
+       if (end - b < sizeof(*hdr) ||
+           hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST ||
+           hdr->hdr.version != 1) {
+               pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n",
+                         hdr->hdr.content, hdr->hdr.version, end - b);
+               ret = -EDESTADDRREQ;
+               goto dump;
+       }
+
+       nr_servers = hdr->nr_servers;
+
+       vllist = afs_alloc_vlserver_list(nr_servers);
+       if (!vllist)
+               return ERR_PTR(-ENOMEM);
+
+       vllist->source = (hdr->source < NR__dns_record_source) ?
+               hdr->source : NR__dns_record_source;
+       vllist->status = (hdr->status < NR__dns_lookup_status) ?
+               hdr->status : NR__dns_lookup_status;
+
+       read_lock(&cell->vl_servers_lock);
+       previous = afs_get_vlserverlist(
+               rcu_dereference_protected(cell->vl_servers,
+                                         lockdep_is_held(&cell->vl_servers_lock)));
+       read_unlock(&cell->vl_servers_lock);
+
+       b += sizeof(*hdr);
+       while (end - b >= sizeof(bs)) {
+               bs.name_len     = afs_extract_le16(&b);
+               bs.priority     = afs_extract_le16(&b);
+               bs.weight       = afs_extract_le16(&b);
+               bs.port         = afs_extract_le16(&b);
+               bs.source       = *b++;
+               bs.status       = *b++;
+               bs.protocol     = *b++;
+               bs.nr_addrs     = *b++;
+
+               _debug("extract %u %u %u %u %u %u %*.*s",
+                      bs.name_len, bs.priority, bs.weight,
+                      bs.port, bs.protocol, bs.nr_addrs,
+                      bs.name_len, bs.name_len, b);
+
+               if (end - b < bs.name_len)
+                       break;
+
+               ret = -EPROTONOSUPPORT;
+               if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) {
+                       bs.protocol = DNS_SERVER_PROTOCOL_UDP;
+               } else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) {
+                       _leave(" = [proto %u]", bs.protocol);
+                       goto error;
+               }
+
+               if (bs.port == 0)
+                       bs.port = AFS_VL_PORT;
+               if (bs.source > NR__dns_record_source)
+                       bs.source = NR__dns_record_source;
+               if (bs.status > NR__dns_lookup_status)
+                       bs.status = NR__dns_lookup_status;
+
+               server = NULL;
+               if (previous) {
+                       /* See if we can update an old server record */
+                       for (i = 0; i < previous->nr_servers; i++) {
+                               struct afs_vlserver *p = previous->servers[i].server;
+
+                               if (p->name_len == bs.name_len &&
+                                   p->port == bs.port &&
+                                   strncasecmp(b, p->name, bs.name_len) == 0) {
+                                       server = afs_get_vlserver(p);
+                                       break;
+                               }
+                       }
+               }
+
+               if (!server) {
+                       ret = -ENOMEM;
+                       server = afs_alloc_vlserver(b, bs.name_len, bs.port);
+                       if (!server)
+                               goto error;
+               }
+
+               b += bs.name_len;
+
+               /* Extract the addresses - note that we can't skip this as we
+                * have to advance the payload pointer.
+                */
+               addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port);
+               if (IS_ERR(addrs)) {
+                       ret = PTR_ERR(addrs);
+                       goto error_2;
+               }
+
+               if (vllist->nr_servers >= nr_servers) {
+                       _debug("skip %u >= %u", vllist->nr_servers, nr_servers);
+                       afs_put_addrlist(addrs);
+                       afs_put_vlserver(cell->net, server);
+                       continue;
+               }
+
+               addrs->source = bs.source;
+               addrs->status = bs.status;
+
+               if (addrs->nr_addrs == 0) {
+                       afs_put_addrlist(addrs);
+                       if (!rcu_access_pointer(server->addresses)) {
+                               afs_put_vlserver(cell->net, server);
+                               continue;
+                       }
+               } else {
+                       struct afs_addr_list *old = addrs;
+
+                       write_lock(&server->lock);
+                       rcu_swap_protected(server->addresses, old,
+                                          lockdep_is_held(&server->lock));
+                       write_unlock(&server->lock);
+                       afs_put_addrlist(old);
+               }
+
+
+               /* TODO: Might want to check for duplicates */
+
+               /* Insertion-sort by priority and weight */
+               for (j = 0; j < vllist->nr_servers; j++) {
+                       if (bs.priority < vllist->servers[j].priority)
+                               break; /* Lower preferable */
+                       if (bs.priority == vllist->servers[j].priority &&
+                           bs.weight > vllist->servers[j].weight)
+                               break; /* Higher preferable */
+               }
+
+               if (j < vllist->nr_servers) {
+                       memmove(vllist->servers + j + 1,
+                               vllist->servers + j,
+                               (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
+               }
+
+               clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+
+               vllist->servers[j].priority = bs.priority;
+               vllist->servers[j].weight = bs.weight;
+               vllist->servers[j].server = server;
+               vllist->nr_servers++;
+       }
+
+       if (b != end) {
+               _debug("parse error %zd", b - end);
+               goto error;
+       }
+
+       afs_put_vlserverlist(cell->net, previous);
+       _leave(" = ok [%u]", vllist->nr_servers);
+       return vllist;
+
+error_2:
+       afs_put_vlserver(cell->net, server);
+error:
+       afs_put_vlserverlist(cell->net, vllist);
+       afs_put_vlserverlist(cell->net, previous);
+dump:
+       if (ret != -ENOMEM) {
+               printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer);
+               print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size);
+       }
+       return ERR_PTR(ret);
+}
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
new file mode 100644 (file)
index 0000000..c0f616b
--- /dev/null
@@ -0,0 +1,273 @@
+/* AFS vlserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+static bool afs_vl_probe_done(struct afs_vlserver *server)
+{
+       if (!atomic_dec_and_test(&server->probe_outstanding))
+               return false;
+
+       wake_up_var(&server->probe_outstanding);
+       clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags);
+       wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING);
+       return true;
+}
+
+/*
+ * Process the result of probing a vlserver.  This is called after successful
+ * or failed delivery of an VL.GetCapabilities operation.
+ */
+void afs_vlserver_probe_result(struct afs_call *call)
+{
+       struct afs_addr_list *alist = call->alist;
+       struct afs_vlserver *server = call->reply[0];
+       unsigned int server_index = (long)call->reply[1];
+       unsigned int index = call->addr_ix;
+       unsigned int rtt = UINT_MAX;
+       bool have_result = false;
+       u64 _rtt;
+       int ret = call->error;
+
+       _enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code);
+
+       spin_lock(&server->probe_lock);
+
+       switch (ret) {
+       case 0:
+               server->probe.error = 0;
+               goto responded;
+       case -ECONNABORTED:
+               if (!server->probe.responded) {
+                       server->probe.abort_code = call->abort_code;
+                       server->probe.error = ret;
+               }
+               goto responded;
+       case -ENOMEM:
+       case -ENONET:
+               server->probe.local_failure = true;
+               afs_io_error(call, afs_io_error_vl_probe_fail);
+               goto out;
+       case -ECONNRESET: /* Responded, but call expired. */
+       case -ENETUNREACH:
+       case -EHOSTUNREACH:
+       case -ECONNREFUSED:
+       case -ETIMEDOUT:
+       case -ETIME:
+       default:
+               clear_bit(index, &alist->responded);
+               set_bit(index, &alist->failed);
+               if (!server->probe.responded &&
+                   (server->probe.error == 0 ||
+                    server->probe.error == -ETIMEDOUT ||
+                    server->probe.error == -ETIME))
+                       server->probe.error = ret;
+               afs_io_error(call, afs_io_error_vl_probe_fail);
+               goto out;
+       }
+
+responded:
+       set_bit(index, &alist->responded);
+       clear_bit(index, &alist->failed);
+
+       if (call->service_id == YFS_VL_SERVICE) {
+               server->probe.is_yfs = true;
+               set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+               alist->addrs[index].srx_service = call->service_id;
+       } else {
+               server->probe.not_yfs = true;
+               if (!server->probe.is_yfs) {
+                       clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+                       alist->addrs[index].srx_service = call->service_id;
+               }
+       }
+
+       /* Get the RTT and scale it to fit into a 32-bit value that represents
+        * over a minute of time so that we can access it with one instruction
+        * on a 32-bit system.
+        */
+       _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
+       _rtt /= 64;
+       rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
+       if (rtt < server->probe.rtt) {
+               server->probe.rtt = rtt;
+               alist->preferred = index;
+               have_result = true;
+       }
+
+       smp_wmb(); /* Set rtt before responded. */
+       server->probe.responded = true;
+       set_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+out:
+       spin_unlock(&server->probe_lock);
+
+       _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
+              server_index, index, &alist->addrs[index].transport,
+              (unsigned int)rtt, ret);
+
+       have_result |= afs_vl_probe_done(server);
+       if (have_result) {
+               server->probe.have_result = true;
+               wake_up_var(&server->probe.have_result);
+               wake_up_all(&server->probe_wq);
+       }
+}
+
+/*
+ * Probe all of a vlserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static int afs_do_probe_vlserver(struct afs_net *net,
+                                struct afs_vlserver *server,
+                                struct key *key,
+                                unsigned int server_index)
+{
+       struct afs_addr_cursor ac = {
+               .index = 0,
+       };
+       int ret;
+
+       _enter("%s", server->name);
+
+       read_lock(&server->lock);
+       ac.alist = rcu_dereference_protected(server->addresses,
+                                            lockdep_is_held(&server->lock));
+       read_unlock(&server->lock);
+
+       atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+       memset(&server->probe, 0, sizeof(server->probe));
+       server->probe.rtt = UINT_MAX;
+
+       for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
+               ret = afs_vl_get_capabilities(net, &ac, key, server,
+                                             server_index, true);
+               if (ret != -EINPROGRESS) {
+                       afs_vl_probe_done(server);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_send_vl_probes(struct afs_net *net, struct key *key,
+                      struct afs_vlserver_list *vllist)
+{
+       struct afs_vlserver *server;
+       int i, ret;
+
+       for (i = 0; i < vllist->nr_servers; i++) {
+               server = vllist->servers[i].server;
+               if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
+                       continue;
+
+               if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
+                       ret = afs_do_probe_vlserver(net, server, key, i);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Wait for the first as-yet untried server to respond.
+ */
+int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
+                          unsigned long untried)
+{
+       struct wait_queue_entry *waits;
+       struct afs_vlserver *server;
+       unsigned int rtt = UINT_MAX;
+       bool have_responders = false;
+       int pref = -1, i;
+
+       _enter("%u,%lx", vllist->nr_servers, untried);
+
+       /* Only wait for servers that have a probe outstanding. */
+       for (i = 0; i < vllist->nr_servers; i++) {
+               if (test_bit(i, &untried)) {
+                       server = vllist->servers[i].server;
+                       if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+                               __clear_bit(i, &untried);
+                       if (server->probe.responded)
+                               have_responders = true;
+               }
+       }
+       if (have_responders || !untried)
+               return 0;
+
+       waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+       if (!waits)
+               return -ENOMEM;
+
+       for (i = 0; i < vllist->nr_servers; i++) {
+               if (test_bit(i, &untried)) {
+                       server = vllist->servers[i].server;
+                       init_waitqueue_entry(&waits[i], current);
+                       add_wait_queue(&server->probe_wq, &waits[i]);
+               }
+       }
+
+       for (;;) {
+               bool still_probing = false;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               for (i = 0; i < vllist->nr_servers; i++) {
+                       if (test_bit(i, &untried)) {
+                               server = vllist->servers[i].server;
+                               if (server->probe.responded)
+                                       goto stop;
+                               if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+                                       still_probing = true;
+                       }
+               }
+
+               if (!still_probing || unlikely(signal_pending(current)))
+                       goto stop;
+               schedule();
+       }
+
+stop:
+       set_current_state(TASK_RUNNING);
+
+       for (i = 0; i < vllist->nr_servers; i++) {
+               if (test_bit(i, &untried)) {
+                       server = vllist->servers[i].server;
+                       if (server->probe.responded &&
+                           server->probe.rtt < rtt) {
+                               pref = i;
+                               rtt = server->probe.rtt;
+                       }
+
+                       remove_wait_queue(&server->probe_wq, &waits[i]);
+               }
+       }
+
+       kfree(waits);
+
+       if (pref == -1 && signal_pending(current))
+               return -ERESTARTSYS;
+
+       if (pref >= 0)
+               vllist->preferred = pref;
+
+       _leave(" = 0 [%u]", pref);
+       return 0;
+}
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
new file mode 100644 (file)
index 0000000..b64a284
--- /dev/null
@@ -0,0 +1,355 @@
+/* Handle vlserver selection and rotation.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include "internal.h"
+#include "afs_vl.h"
+
+/*
+ * Begin an operation on a volume location server.
+ */
+bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
+                                 struct key *key)
+{
+       memset(vc, 0, sizeof(*vc));
+       vc->cell = cell;
+       vc->key = key;
+       vc->error = -EDESTADDRREQ;
+       vc->ac.error = SHRT_MAX;
+
+       if (signal_pending(current)) {
+               vc->error = -EINTR;
+               vc->flags |= AFS_VL_CURSOR_STOP;
+               return false;
+       }
+
+       return true;
+}
+
+/*
+ * Begin iteration through a server list, starting with the last used server if
+ * possible, or the last recorded good server if not.
+ */
+static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
+{
+       struct afs_cell *cell = vc->cell;
+
+       if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
+                       TASK_INTERRUPTIBLE)) {
+               vc->error = -ERESTARTSYS;
+               return false;
+       }
+
+       read_lock(&cell->vl_servers_lock);
+       vc->server_list = afs_get_vlserverlist(
+               rcu_dereference_protected(cell->vl_servers,
+                                         lockdep_is_held(&cell->vl_servers_lock)));
+       read_unlock(&cell->vl_servers_lock);
+       if (!vc->server_list || !vc->server_list->nr_servers)
+               return false;
+
+       vc->untried = (1UL << vc->server_list->nr_servers) - 1;
+       vc->index = -1;
+       return true;
+}
+
+/*
+ * Select the vlserver to use.  May be called multiple times to rotate
+ * through the vlservers.
+ */
+bool afs_select_vlserver(struct afs_vl_cursor *vc)
+{
+       struct afs_addr_list *alist;
+       struct afs_vlserver *vlserver;
+       u32 rtt;
+       int error = vc->ac.error, abort_code, i;
+
+       _enter("%lx[%d],%lx[%d],%d,%d",
+              vc->untried, vc->index,
+              vc->ac.tried, vc->ac.index,
+              error, vc->ac.abort_code);
+
+       if (vc->flags & AFS_VL_CURSOR_STOP) {
+               _leave(" = f [stopped]");
+               return false;
+       }
+
+       vc->nr_iterations++;
+
+       /* Evaluate the result of the previous operation, if there was one. */
+       switch (error) {
+       case SHRT_MAX:
+               goto start;
+
+       default:
+       case 0:
+               /* Success or local failure.  Stop. */
+               vc->error = error;
+               vc->flags |= AFS_VL_CURSOR_STOP;
+               _leave(" = f [okay/local %d]", vc->ac.error);
+               return false;
+
+       case -ECONNABORTED:
+               /* The far side rejected the operation on some grounds.  This
+                * might involve the server being busy or the volume having been moved.
+                */
+               switch (vc->ac.abort_code) {
+               case AFSVL_IO:
+               case AFSVL_BADVOLOPER:
+               case AFSVL_NOMEM:
+                       /* The server went weird. */
+                       vc->error = -EREMOTEIO;
+                       //write_lock(&vc->cell->vl_servers_lock);
+                       //vc->server_list->weird_mask |= 1 << vc->index;
+                       //write_unlock(&vc->cell->vl_servers_lock);
+                       goto next_server;
+
+               default:
+                       vc->error = afs_abort_to_error(vc->ac.abort_code);
+                       goto failed;
+               }
+
+       case -ENETUNREACH:
+       case -EHOSTUNREACH:
+       case -ECONNREFUSED:
+       case -ETIMEDOUT:
+       case -ETIME:
+               _debug("no conn %d", error);
+               vc->error = error;
+               goto iterate_address;
+
+       case -ECONNRESET:
+               _debug("call reset");
+               vc->error = error;
+               vc->flags |= AFS_VL_CURSOR_RETRY;
+               goto next_server;
+       }
+
+restart_from_beginning:
+       _debug("restart");
+       afs_end_cursor(&vc->ac);
+       afs_put_vlserverlist(vc->cell->net, vc->server_list);
+       vc->server_list = NULL;
+       if (vc->flags & AFS_VL_CURSOR_RETRIED)
+               goto failed;
+       vc->flags |= AFS_VL_CURSOR_RETRIED;
+start:
+       _debug("start");
+
+       if (!afs_start_vl_iteration(vc))
+               goto failed;
+
+       error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
+       if (error < 0)
+               goto failed_set_error;
+
+pick_server:
+       _debug("pick [%lx]", vc->untried);
+
+       error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
+       if (error < 0)
+               goto failed_set_error;
+
+       /* Pick the untried server with the lowest RTT. */
+       vc->index = vc->server_list->preferred;
+       if (test_bit(vc->index, &vc->untried))
+               goto selected_server;
+
+       vc->index = -1;
+       rtt = U32_MAX;
+       for (i = 0; i < vc->server_list->nr_servers; i++) {
+               struct afs_vlserver *s = vc->server_list->servers[i].server;
+
+               if (!test_bit(i, &vc->untried) || !s->probe.responded)
+                       continue;
+               if (s->probe.rtt < rtt) {
+                       vc->index = i;
+                       rtt = s->probe.rtt;
+               }
+       }
+
+       if (vc->index == -1)
+               goto no_more_servers;
+
+selected_server:
+       _debug("use %d", vc->index);
+       __clear_bit(vc->index, &vc->untried);
+
+       /* We're starting on a different vlserver from the list.  We need to
+        * check it, find its address list and probe its capabilities before we
+        * use it.
+        */
+       ASSERTCMP(vc->ac.alist, ==, NULL);
+       vlserver = vc->server_list->servers[vc->index].server;
+       vc->server = vlserver;
+
+       _debug("USING VLSERVER: %s", vlserver->name);
+
+       read_lock(&vlserver->lock);
+       alist = rcu_dereference_protected(vlserver->addresses,
+                                         lockdep_is_held(&vlserver->lock));
+       afs_get_addrlist(alist);
+       read_unlock(&vlserver->lock);
+
+       memset(&vc->ac, 0, sizeof(vc->ac));
+
+       if (!vc->ac.alist)
+               vc->ac.alist = alist;
+       else
+               afs_put_addrlist(alist);
+
+       vc->ac.index = -1;
+
+iterate_address:
+       ASSERT(vc->ac.alist);
+       /* Iterate over the current server's address list to try and find an
+        * address on which it will respond to us.
+        */
+       if (!afs_iterate_addresses(&vc->ac))
+               goto next_server;
+
+       _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
+
+       _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
+       return true;
+
+next_server:
+       _debug("next");
+       afs_end_cursor(&vc->ac);
+       goto pick_server;
+
+no_more_servers:
+       /* That's all the servers poked to no good effect.  Try again if some
+        * of them were busy.
+        */
+       if (vc->flags & AFS_VL_CURSOR_RETRY)
+               goto restart_from_beginning;
+
+       abort_code = 0;
+       error = -EDESTADDRREQ;
+       for (i = 0; i < vc->server_list->nr_servers; i++) {
+               struct afs_vlserver *s = vc->server_list->servers[i].server;
+               int probe_error = READ_ONCE(s->probe.error);
+
+               switch (probe_error) {
+               case 0:
+                       continue;
+               default:
+                       if (error == -ETIMEDOUT ||
+                           error == -ETIME)
+                               continue;
+               case -ETIMEDOUT:
+               case -ETIME:
+                       if (error == -ENOMEM ||
+                           error == -ENONET)
+                               continue;
+               case -ENOMEM:
+               case -ENONET:
+                       if (error == -ENETUNREACH)
+                               continue;
+               case -ENETUNREACH:
+                       if (error == -EHOSTUNREACH)
+                               continue;
+               case -EHOSTUNREACH:
+                       if (error == -ECONNREFUSED)
+                               continue;
+               case -ECONNREFUSED:
+                       if (error == -ECONNRESET)
+                               continue;
+               case -ECONNRESET: /* Responded, but call expired. */
+                       if (error == -ECONNABORTED)
+                               continue;
+               case -ECONNABORTED:
+                       abort_code = s->probe.abort_code;
+                       error = probe_error;
+                       continue;
+               }
+       }
+
+       if (error == -ECONNABORTED)
+               error = afs_abort_to_error(abort_code);
+
+failed_set_error:
+       vc->error = error;
+failed:
+       vc->flags |= AFS_VL_CURSOR_STOP;
+       afs_end_cursor(&vc->ac);
+       _leave(" = f [failed %d]", vc->error);
+       return false;
+}
+
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
+{
+       static int count;
+       int i;
+
+       if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+               return;
+       count++;
+
+       rcu_read_lock();
+       pr_notice("EDESTADDR occurred\n");
+       pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
+                 vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
+
+       if (vc->server_list) {
+               const struct afs_vlserver_list *sl = vc->server_list;
+               pr_notice("VC: SL nr=%u ix=%u\n",
+                         sl->nr_servers, sl->index);
+               for (i = 0; i < sl->nr_servers; i++) {
+                       const struct afs_vlserver *s = sl->servers[i].server;
+                       pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
+                                 s->name, s->port, s->flags, s->probe.error);
+                       if (s->addresses) {
+                               const struct afs_addr_list *a =
+                                       rcu_dereference(s->addresses);
+                               pr_notice("VC:  - nr=%u/%u/%u pf=%u\n",
+                                         a->nr_ipv4, a->nr_addrs, a->max_addrs,
+                                         a->preferred);
+                               pr_notice("VC:  - pr=%lx R=%lx F=%lx\n",
+                                         a->probed, a->responded, a->failed);
+                               if (a == vc->ac.alist)
+                                       pr_notice("VC:  - current\n");
+                       }
+               }
+       }
+
+       pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
+                 vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
+                 vc->ac.responded, vc->ac.nr_iterations);
+       rcu_read_unlock();
+}
+
+/*
+ * Tidy up a volume location server cursor and unlock the vnode.
+ */
+int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
+{
+       struct afs_net *net = vc->cell->net;
+
+       if (vc->error == -EDESTADDRREQ ||
+           vc->error == -ENETUNREACH ||
+           vc->error == -EHOSTUNREACH)
+               afs_vl_dump_edestaddrreq(vc);
+
+       afs_end_cursor(&vc->ac);
+       afs_put_vlserverlist(net, vc->server_list);
+
+       if (vc->error == -ECONNABORTED)
+               vc->error = afs_abort_to_error(vc->ac.abort_code);
+
+       return vc->error;
+}
index c3b740813fc719850ca188f892d4f653352e8600..c3d9e5a5f67eeb13670b372b04f5b5dc4241282d 100644 (file)
@@ -128,14 +128,13 @@ static const struct afs_call_type afs_RXVLGetEntryByNameU = {
  * Dispatch a get volume entry by name or ID operation (uuid variant).  If the
  * volname is a decimal number then it's a volume ID not a volume name.
  */
-struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
-                                                 struct afs_addr_cursor *ac,
-                                                 struct key *key,
+struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
                                                  const char *volname,
                                                  int volnamesz)
 {
        struct afs_vldb_entry *entry;
        struct afs_call *call;
+       struct afs_net *net = vc->cell->net;
        size_t reqsz, padsz;
        __be32 *bp;
 
@@ -155,7 +154,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
                return ERR_PTR(-ENOMEM);
        }
 
-       call->key = key;
+       call->key = vc->key;
        call->reply[0] = entry;
        call->ret_reply0 = true;
 
@@ -168,7 +167,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
                memset((void *)bp + volnamesz, 0, padsz);
 
        trace_afs_make_vl_call(call);
-       return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false);
+       return (struct afs_vldb_entry *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
 }
 
 /*
@@ -187,19 +186,18 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
        u32 uniquifier, nentries, count;
        int i, ret;
 
-       _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+       _enter("{%u,%zu/%u}",
+              call->unmarshall, iov_iter_count(call->_iter), call->count);
 
-again:
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_buf(call,
+                                  sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
                call->unmarshall++;
 
                /* Extract the returned uuid, uniquifier, nentries and blkaddrs size */
        case 1:
-               ret = afs_extract_data(call, call->buffer,
-                                      sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32),
-                                      true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -216,28 +214,28 @@ again:
                call->reply[0] = alist;
                call->count = count;
                call->count2 = nentries;
-               call->offset = 0;
                call->unmarshall++;
 
+       more_entries:
+               count = min(call->count, 4U);
+               afs_extract_to_buf(call, count * sizeof(__be32));
+
                /* Extract entries */
        case 2:
-               count = min(call->count, 4U);
-               ret = afs_extract_data(call, call->buffer,
-                                      count * sizeof(__be32),
-                                      call->count > 4);
+               ret = afs_extract_data(call, call->count > 4);
                if (ret < 0)
                        return ret;
 
                alist = call->reply[0];
                bp = call->buffer;
+               count = min(call->count, 4U);
                for (i = 0; i < count; i++)
                        if (alist->nr_addrs < call->count2)
                                afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
 
                call->count -= count;
                if (call->count > 0)
-                       goto again;
-               call->offset = 0;
+                       goto more_entries;
                call->unmarshall++;
                break;
        }
@@ -267,14 +265,13 @@ static const struct afs_call_type afs_RXVLGetAddrsU = {
  * Dispatch an operation to get the addresses for a server, where the server is
  * nominated by UUID.
  */
-struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
-                                        struct afs_addr_cursor *ac,
-                                        struct key *key,
+struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
                                         const uuid_t *uuid)
 {
        struct afs_ListAddrByAttributes__xdr *r;
        const struct afs_uuid *u = (const struct afs_uuid *)uuid;
        struct afs_call *call;
+       struct afs_net *net = vc->cell->net;
        __be32 *bp;
        int i;
 
@@ -286,7 +283,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
        if (!call)
                return ERR_PTR(-ENOMEM);
 
-       call->key = key;
+       call->key = vc->key;
        call->reply[0] = NULL;
        call->ret_reply0 = true;
 
@@ -307,7 +304,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
                r->uuid.node[i] = htonl(u->node[i]);
 
        trace_afs_make_vl_call(call);
-       return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
+       return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
 }
 
 /*
@@ -318,54 +315,51 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
        u32 count;
        int ret;
 
-       _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+       _enter("{%u,%zu/%u}",
+              call->unmarshall, iov_iter_count(call->_iter), call->count);
 
-again:
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* Extract the capabilities word count */
        case 1:
-               ret = afs_extract_data(call, &call->tmp,
-                                      1 * sizeof(__be32),
-                                      true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                count = ntohl(call->tmp);
-
                call->count = count;
                call->count2 = count;
-               call->offset = 0;
+
                call->unmarshall++;
+               afs_extract_discard(call, count * sizeof(__be32));
 
                /* Extract capabilities words */
        case 2:
-               count = min(call->count, 16U);
-               ret = afs_extract_data(call, call->buffer,
-                                      count * sizeof(__be32),
-                                      call->count > 16);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
                /* TODO: Examine capabilities */
 
-               call->count -= count;
-               if (call->count > 0)
-                       goto again;
-               call->offset = 0;
                call->unmarshall++;
                break;
        }
 
-       call->reply[0] = (void *)(unsigned long)call->service_id;
-
        _leave(" = 0 [done]");
        return 0;
 }
 
+static void afs_destroy_vl_get_capabilities(struct afs_call *call)
+{
+       struct afs_vlserver *server = call->reply[0];
+
+       afs_put_vlserver(call->net, server);
+       afs_flat_call_destructor(call);
+}
+
 /*
  * VL.GetCapabilities operation type
  */
@@ -373,11 +367,12 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
        .name           = "VL.GetCapabilities",
        .op             = afs_VL_GetCapabilities,
        .deliver        = afs_deliver_vl_get_capabilities,
-       .destructor     = afs_flat_call_destructor,
+       .done           = afs_vlserver_probe_result,
+       .destructor     = afs_destroy_vl_get_capabilities,
 };
 
 /*
- * Probe a fileserver for the capabilities that it supports.  This can
+ * Probe a volume server for the capabilities that it supports.  This can
  * return up to 196 words.
  *
  * We use this to probe for service upgrade to determine what the server at the
@@ -385,7 +380,10 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
  */
 int afs_vl_get_capabilities(struct afs_net *net,
                            struct afs_addr_cursor *ac,
-                           struct key *key)
+                           struct key *key,
+                           struct afs_vlserver *server,
+                           unsigned int server_index,
+                           bool async)
 {
        struct afs_call *call;
        __be32 *bp;
@@ -397,9 +395,10 @@ int afs_vl_get_capabilities(struct afs_net *net,
                return -ENOMEM;
 
        call->key = key;
-       call->upgrade = true; /* Let's see if this is a YFS server */
-       call->reply[0] = (void *)VLGETCAPABILITIES;
-       call->ret_reply0 = true;
+       call->reply[0] = afs_get_vlserver(server);
+       call->reply[1] = (void *)(long)server_index;
+       call->upgrade = true;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -407,7 +406,7 @@ int afs_vl_get_capabilities(struct afs_net *net,
 
        /* Can't take a ref on server */
        trace_afs_make_vl_call(call);
-       return afs_make_call(ac, call, GFP_KERNEL, false);
+       return afs_make_call(ac, call, GFP_KERNEL, async);
 }
 
 /*
@@ -426,22 +425,19 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
        u32 uniquifier, size;
        int ret;
 
-       _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2);
+       _enter("{%u,%zu,%u}",
+              call->unmarshall, iov_iter_count(call->_iter), call->count2);
 
-again:
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32));
                call->unmarshall = 1;
 
                /* Extract the returned uuid, uniquifier, fsEndpoints count and
                 * either the first fsEndpoint type or the volEndpoints
                 * count if there are no fsEndpoints. */
        case 1:
-               ret = afs_extract_data(call, call->buffer,
-                                      sizeof(uuid_t) +
-                                      3 * sizeof(__be32),
-                                      true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -451,22 +447,19 @@ again:
                call->count2    = ntohl(*bp); /* Type or next count */
 
                if (call->count > YFS_MAXENDPOINTS)
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_yvl_fsendpt_num);
 
                alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
                if (!alist)
                        return -ENOMEM;
                alist->version = uniquifier;
                call->reply[0] = alist;
-               call->offset = 0;
 
                if (call->count == 0)
                        goto extract_volendpoints;
 
-               call->unmarshall = 2;
-
-               /* Extract fsEndpoints[] entries */
-       case 2:
+       next_fsendpoint:
                switch (call->count2) {
                case YFS_ENDPOINT_IPV4:
                        size = sizeof(__be32) * (1 + 1 + 1);
@@ -475,11 +468,17 @@ again:
                        size = sizeof(__be32) * (1 + 4 + 1);
                        break;
                default:
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_yvl_fsendpt_type);
                }
 
                size += sizeof(__be32);
-               ret = afs_extract_data(call, call->buffer, size, true);
+               afs_extract_to_buf(call, size);
+               call->unmarshall = 2;
+
+               /* Extract fsEndpoints[] entries */
+       case 2:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -488,18 +487,21 @@ again:
                switch (call->count2) {
                case YFS_ENDPOINT_IPV4:
                        if (ntohl(bp[0]) != sizeof(__be32) * 2)
-                               return afs_protocol_error(call, -EBADMSG);
+                               return afs_protocol_error(call, -EBADMSG,
+                                                         afs_eproto_yvl_fsendpt4_len);
                        afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
                        bp += 3;
                        break;
                case YFS_ENDPOINT_IPV6:
                        if (ntohl(bp[0]) != sizeof(__be32) * 5)
-                               return afs_protocol_error(call, -EBADMSG);
+                               return afs_protocol_error(call, -EBADMSG,
+                                                         afs_eproto_yvl_fsendpt6_len);
                        afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
                        bp += 6;
                        break;
                default:
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_yvl_fsendpt_type);
                }
 
                /* Got either the type of the next entry or the count of
@@ -507,10 +509,9 @@ again:
                 */
                call->count2 = ntohl(*bp++);
 
-               call->offset = 0;
                call->count--;
                if (call->count > 0)
-                       goto again;
+                       goto next_fsendpoint;
 
        extract_volendpoints:
                /* Extract the list of volEndpoints. */
@@ -518,8 +519,10 @@ again:
                if (!call->count)
                        goto end;
                if (call->count > YFS_MAXENDPOINTS)
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_yvl_vlendpt_type);
 
+               afs_extract_to_buf(call, 1 * sizeof(__be32));
                call->unmarshall = 3;
 
                /* Extract the type of volEndpoints[0].  Normally we would
@@ -527,17 +530,14 @@ again:
                 * data of the current one, but this is the first...
                 */
        case 3:
-               ret = afs_extract_data(call, call->buffer, sizeof(__be32), true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
-               call->count2 = ntohl(*bp++);
-               call->offset = 0;
-               call->unmarshall = 4;
 
-               /* Extract volEndpoints[] entries */
-       case 4:
+       next_volendpoint:
+               call->count2 = ntohl(*bp++);
                switch (call->count2) {
                case YFS_ENDPOINT_IPV4:
                        size = sizeof(__be32) * (1 + 1 + 1);
@@ -546,12 +546,18 @@ again:
                        size = sizeof(__be32) * (1 + 4 + 1);
                        break;
                default:
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_yvl_vlendpt_type);
                }
 
                if (call->count > 1)
-                       size += sizeof(__be32);
-               ret = afs_extract_data(call, call->buffer, size, true);
+                       size += sizeof(__be32); /* Get next type too */
+               afs_extract_to_buf(call, size);
+               call->unmarshall = 4;
+
+               /* Extract volEndpoints[] entries */
+       case 4:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -559,34 +565,35 @@ again:
                switch (call->count2) {
                case YFS_ENDPOINT_IPV4:
                        if (ntohl(bp[0]) != sizeof(__be32) * 2)
-                               return afs_protocol_error(call, -EBADMSG);
+                               return afs_protocol_error(call, -EBADMSG,
+                                                         afs_eproto_yvl_vlendpt4_len);
                        bp += 3;
                        break;
                case YFS_ENDPOINT_IPV6:
                        if (ntohl(bp[0]) != sizeof(__be32) * 5)
-                               return afs_protocol_error(call, -EBADMSG);
+                               return afs_protocol_error(call, -EBADMSG,
+                                                         afs_eproto_yvl_vlendpt6_len);
                        bp += 6;
                        break;
                default:
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_yvl_vlendpt_type);
                }
 
                /* Got either the type of the next entry or the count of
                 * volEndpoints if no more fsEndpoints.
                 */
-               call->offset = 0;
                call->count--;
-               if (call->count > 0) {
-                       call->count2 = ntohl(*bp++);
-                       goto again;
-               }
+               if (call->count > 0)
+                       goto next_volendpoint;
 
        end:
+               afs_extract_discard(call, 0);
                call->unmarshall = 5;
 
                /* Done */
        case 5:
-               ret = afs_extract_data(call, call->buffer, 0, false);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
                call->unmarshall = 6;
@@ -596,11 +603,6 @@ again:
        }
 
        alist = call->reply[0];
-
-       /* Start with IPv6 if available. */
-       if (alist->nr_ipv4 < alist->nr_addrs)
-               alist->index = alist->nr_ipv4;
-
        _leave(" = 0 [done]");
        return 0;
 }
@@ -619,12 +621,11 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = {
  * Dispatch an operation to get the addresses for a server, where the server is
  * nominated by UUID.
  */
-struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
-                                             struct afs_addr_cursor *ac,
-                                             struct key *key,
+struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
                                              const uuid_t *uuid)
 {
        struct afs_call *call;
+       struct afs_net *net = vc->cell->net;
        __be32 *bp;
 
        _enter("");
@@ -635,7 +636,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
        if (!call)
                return ERR_PTR(-ENOMEM);
 
-       call->key = key;
+       call->key = vc->key;
        call->reply[0] = NULL;
        call->ret_reply0 = true;
 
@@ -646,5 +647,5 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
        memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
 
        trace_afs_make_vl_call(call);
-       return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
+       return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
 }
index 3037bd01f617d13b1589d823cb6bdc112014bdca..00975ed3640f8ae535d7438d2076ef681434f378 100644 (file)
@@ -74,55 +74,19 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
                                                 const char *volname,
                                                 size_t volnamesz)
 {
-       struct afs_addr_cursor ac;
-       struct afs_vldb_entry *vldb;
+       struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
+       struct afs_vl_cursor vc;
        int ret;
 
-       ret = afs_set_vl_cursor(&ac, cell);
-       if (ret < 0)
-               return ERR_PTR(ret);
-
-       while (afs_iterate_addresses(&ac)) {
-               if (!test_bit(ac.index, &ac.alist->probed)) {
-                       ret = afs_vl_get_capabilities(cell->net, &ac, key);
-                       switch (ret) {
-                       case VL_SERVICE:
-                               clear_bit(ac.index, &ac.alist->yfs);
-                               set_bit(ac.index, &ac.alist->probed);
-                               ac.addr->srx_service = ret;
-                               break;
-                       case YFS_VL_SERVICE:
-                               set_bit(ac.index, &ac.alist->yfs);
-                               set_bit(ac.index, &ac.alist->probed);
-                               ac.addr->srx_service = ret;
-                               break;
-                       }
-               }
-               
-               vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
-                                                 volname, volnamesz);
-               switch (ac.error) {
-               case 0:
-                       afs_end_cursor(&ac);
-                       return vldb;
-               case -ECONNABORTED:
-                       ac.error = afs_abort_to_error(ac.abort_code);
-                       goto error;
-               case -ENOMEM:
-               case -ENONET:
-                       goto error;
-               case -ENETUNREACH:
-               case -EHOSTUNREACH:
-               case -ECONNREFUSED:
-                       break;
-               default:
-                       ac.error = -EIO;
-                       goto error;
-               }
+       if (!afs_begin_vlserver_operation(&vc, cell, key))
+               return ERR_PTR(-ERESTARTSYS);
+
+       while (afs_select_vlserver(&vc)) {
+               vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
        }
 
-error:
-       return ERR_PTR(afs_end_cursor(&ac));
+       ret = afs_end_vlserver_operation(&vc);
+       return ret < 0 ? ERR_PTR(ret) : vldb;
 }
 
 /*
@@ -270,7 +234,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
        /* We look up an ID by passing it as a decimal string in the
         * operation's name parameter.
         */
-       idsz = sprintf(idbuf, "%u", volume->vid);
+       idsz = sprintf(idbuf, "%llu", volume->vid);
 
        vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
        if (IS_ERR(vldb)) {
index 19c04caf3c012bd777d20e31355357154cc38a83..72efcfcf9f95efd2b5cae1257a8d01247367ebeb 100644 (file)
@@ -33,10 +33,21 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
                         loff_t pos, unsigned int len, struct page *page)
 {
        struct afs_read *req;
+       size_t p;
+       void *data;
        int ret;
 
        _enter(",,%llu", (unsigned long long)pos);
 
+       if (pos >= vnode->vfs_inode.i_size) {
+               p = pos & ~PAGE_MASK;
+               ASSERTCMP(p + len, <=, PAGE_SIZE);
+               data = kmap(page);
+               memset(data + p, 0, len);
+               kunmap(page);
+               return 0;
+       }
+
        req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
                      GFP_KERNEL);
        if (!req)
@@ -81,7 +92,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
        pgoff_t index = pos >> PAGE_SHIFT;
        int ret;
 
-       _enter("{%x:%u},{%lx},%u,%u",
+       _enter("{%llx:%llu},{%lx},%u,%u",
               vnode->fid.vid, vnode->fid.vnode, index, from, to);
 
        /* We want to store information about how much of a page is altered in
@@ -181,7 +192,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
        loff_t i_size, maybe_i_size;
        int ret;
 
-       _enter("{%x:%u},{%lx}",
+       _enter("{%llx:%llu},{%lx}",
               vnode->fid.vid, vnode->fid.vnode, page->index);
 
        maybe_i_size = pos + copied;
@@ -230,7 +241,7 @@ static void afs_kill_pages(struct address_space *mapping,
        struct pagevec pv;
        unsigned count, loop;
 
-       _enter("{%x:%u},%lx-%lx",
+       _enter("{%llx:%llu},%lx-%lx",
               vnode->fid.vid, vnode->fid.vnode, first, last);
 
        pagevec_init(&pv);
@@ -272,7 +283,7 @@ static void afs_redirty_pages(struct writeback_control *wbc,
        struct pagevec pv;
        unsigned count, loop;
 
-       _enter("{%x:%u},%lx-%lx",
+       _enter("{%llx:%llu},%lx-%lx",
               vnode->fid.vid, vnode->fid.vnode, first, last);
 
        pagevec_init(&pv);
@@ -314,7 +325,7 @@ static int afs_store_data(struct address_space *mapping,
        struct list_head *p;
        int ret = -ENOKEY, ret2;
 
-       _enter("%s{%x:%u.%u},%lx,%lx,%x,%x",
+       _enter("%s{%llx:%llu.%u},%lx,%lx,%x,%x",
               vnode->volume->name,
               vnode->fid.vid,
               vnode->fid.vnode,
@@ -533,6 +544,7 @@ no_more:
        case -ENOENT:
        case -ENOMEDIUM:
        case -ENXIO:
+               trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
                afs_kill_pages(mapping, first, last);
                mapping_set_error(mapping, ret);
                break;
@@ -675,7 +687,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
        unsigned count, loop;
        pgoff_t first = call->first, last = call->last;
 
-       _enter("{%x:%u},{%lx-%lx}",
+       _enter("{%llx:%llu},{%lx-%lx}",
               vnode->fid.vid, vnode->fid.vnode, first, last);
 
        pagevec_init(&pv);
@@ -714,7 +726,7 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
        ssize_t result;
        size_t count = iov_iter_count(from);
 
-       _enter("{%x.%u},{%zu},",
+       _enter("{%llx:%llu},{%zu},",
               vnode->fid.vid, vnode->fid.vnode, count);
 
        if (IS_SWAPFILE(&vnode->vfs_inode)) {
@@ -742,7 +754,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        struct inode *inode = file_inode(file);
        struct afs_vnode *vnode = AFS_FS_I(inode);
 
-       _enter("{%x:%u},{n=%pD},%d",
+       _enter("{%llx:%llu},{n=%pD},%d",
               vnode->fid.vid, vnode->fid.vnode, file,
               datasync);
 
@@ -760,7 +772,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
        struct afs_vnode *vnode = AFS_FS_I(inode);
        unsigned long priv;
 
-       _enter("{{%x:%u}},{%lx}",
+       _enter("{{%llx:%llu}},{%lx}",
               vnode->fid.vid, vnode->fid.vnode, vmf->page->index);
 
        sb_start_pagefault(inode->i_sb);
index cfcc674e64a55bc52be024d873a5b70fa85fafc1..a2cdf25573e2419592b7460aafc7a6da019293a9 100644 (file)
@@ -72,7 +72,7 @@ static int afs_xattr_get_fid(const struct xattr_handler *handler,
        char text[8 + 1 + 8 + 1 + 8 + 1];
        size_t len;
 
-       len = sprintf(text, "%x:%x:%x",
+       len = sprintf(text, "%llx:%llx:%x",
                      vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
        if (size == 0)
                return len;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
new file mode 100644 (file)
index 0000000..12658c1
--- /dev/null
@@ -0,0 +1,2184 @@
+/* YFS File Server client stubs
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/circ_buf.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+#include "protocol_yfs.h"
+
+static const struct afs_fid afs_zero_fid;
+
+static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
+{
+       call->cbi = afs_get_cb_interest(cbi);
+}
+
+#define xdr_size(x) (sizeof(*x) / sizeof(__be32))
+
+static void xdr_decode_YFSFid(const __be32 **_bp, struct afs_fid *fid)
+{
+       const struct yfs_xdr_YFSFid *x = (const void *)*_bp;
+
+       fid->vid        = xdr_to_u64(x->volume);
+       fid->vnode      = xdr_to_u64(x->vnode.lo);
+       fid->vnode_hi   = ntohl(x->vnode.hi);
+       fid->unique     = ntohl(x->vnode.unique);
+       *_bp += xdr_size(x);
+}
+
+static __be32 *xdr_encode_u32(__be32 *bp, u32 n)
+{
+       *bp++ = htonl(n);
+       return bp;
+}
+
+static __be32 *xdr_encode_u64(__be32 *bp, u64 n)
+{
+       struct yfs_xdr_u64 *x = (void *)bp;
+
+       *x = u64_to_xdr(n);
+       return bp + xdr_size(x);
+}
+
+static __be32 *xdr_encode_YFSFid(__be32 *bp, struct afs_fid *fid)
+{
+       struct yfs_xdr_YFSFid *x = (void *)bp;
+
+       x->volume       = u64_to_xdr(fid->vid);
+       x->vnode.lo     = u64_to_xdr(fid->vnode);
+       x->vnode.hi     = htonl(fid->vnode_hi);
+       x->vnode.unique = htonl(fid->unique);
+       return bp + xdr_size(x);
+}
+
+static size_t xdr_strlen(unsigned int len)
+{
+       return sizeof(__be32) + round_up(len, sizeof(__be32));
+}
+
+static __be32 *xdr_encode_string(__be32 *bp, const char *p, unsigned int len)
+{
+       bp = xdr_encode_u32(bp, len);
+       bp = memcpy(bp, p, len);
+       if (len & 3) {
+               unsigned int pad = 4 - (len & 3);
+
+               memset((u8 *)bp + len, 0, pad);
+               len += pad;
+       }
+
+       return bp + len / sizeof(__be32);
+}
+
+static s64 linux_to_yfs_time(const struct timespec64 *t)
+{
+       /* Convert to 100ns intervals. */
+       return (u64)t->tv_sec * 10000000 + t->tv_nsec/100;
+}
+
+static __be32 *xdr_encode_YFSStoreStatus_mode(__be32 *bp, mode_t mode)
+{
+       struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+
+       x->mask         = htonl(AFS_SET_MODE);
+       x->mode         = htonl(mode & S_IALLUGO);
+       x->mtime_client = u64_to_xdr(0);
+       x->owner        = u64_to_xdr(0);
+       x->group        = u64_to_xdr(0);
+       return bp + xdr_size(x);
+}
+
+static __be32 *xdr_encode_YFSStoreStatus_mtime(__be32 *bp, const struct timespec64 *t)
+{
+       struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+       s64 mtime = linux_to_yfs_time(t);
+
+       x->mask         = htonl(AFS_SET_MTIME);
+       x->mode         = htonl(0);
+       x->mtime_client = u64_to_xdr(mtime);
+       x->owner        = u64_to_xdr(0);
+       x->group        = u64_to_xdr(0);
+       return bp + xdr_size(x);
+}
+
+/*
+ * Convert a signed 100ns-resolution 64-bit time into a timespec.
+ */
+static struct timespec64 yfs_time_to_linux(s64 t)
+{
+       struct timespec64 ts;
+       u64 abs_t;
+
+       /*
+        * Unfortunately can not use normal 64 bit division on 32 bit arch, but
+        * the alternative, do_div, does not work with negative numbers so have
+        * to special case them
+        */
+       if (t < 0) {
+               abs_t = -t;
+               ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100);
+               ts.tv_nsec = -ts.tv_nsec;
+               ts.tv_sec = -abs_t;
+       } else {
+               abs_t = t;
+               ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100;
+               ts.tv_sec = abs_t;
+       }
+
+       return ts;
+}
+
+static struct timespec64 xdr_to_time(const struct yfs_xdr_u64 xdr)
+{
+       s64 t = xdr_to_u64(xdr);
+
+       return yfs_time_to_linux(t);
+}
+
+static void yfs_check_req(struct afs_call *call, __be32 *bp)
+{
+       size_t len = (void *)bp - call->request;
+
+       if (len > call->request_size)
+               pr_err("kAFS: %s: Request buffer overflow (%zu>%u)\n",
+                      call->type->name, len, call->request_size);
+       else if (len < call->request_size)
+               pr_warning("kAFS: %s: Request buffer underflow (%zu<%u)\n",
+                          call->type->name, len, call->request_size);
+}
+
+/*
+ * Dump a bad file status record.
+ */
+static void xdr_dump_bad(const __be32 *bp)
+{
+       __be32 x[4];
+       int i;
+
+       pr_notice("YFS XDR: Bad status record\n");
+       for (i = 0; i < 5 * 4 * 4; i += 16) {
+               memcpy(x, bp, 16);
+               bp += 4;
+               pr_notice("%03x: %08x %08x %08x %08x\n",
+                         i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
+       }
+
+       memcpy(x, bp, 4);
+       pr_notice("0x50: %08x\n", ntohl(x[0]));
+}
+
+/*
+ * Decode a YFSFetchStatus block
+ */
+static int xdr_decode_YFSFetchStatus(struct afs_call *call,
+                                    const __be32 **_bp,
+                                    struct afs_file_status *status,
+                                    struct afs_vnode *vnode,
+                                    const afs_dataversion_t *expected_version,
+                                    struct afs_read *read_req)
+{
+       const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
+       u32 type;
+       u8 flags = 0;
+
+       status->abort_code = ntohl(xdr->abort_code);
+       if (status->abort_code != 0) {
+               if (vnode && status->abort_code == VNOVNODE) {
+                       set_bit(AFS_VNODE_DELETED, &vnode->flags);
+                       status->nlink = 0;
+                       __afs_break_callback(vnode);
+               }
+               return 0;
+       }
+
+       type = ntohl(xdr->type);
+       switch (type) {
+       case AFS_FTYPE_FILE:
+       case AFS_FTYPE_DIR:
+       case AFS_FTYPE_SYMLINK:
+               if (type != status->type &&
+                   vnode &&
+                   !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
+                       pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
+                                  vnode->fid.vid,
+                                  vnode->fid.vnode,
+                                  vnode->fid.unique,
+                                  status->type, type);
+                       goto bad;
+               }
+               status->type = type;
+               break;
+       default:
+               goto bad;
+       }
+
+#define EXTRACT_M4(FIELD)                                      \
+       do {                                                    \
+               u32 x = ntohl(xdr->FIELD);                      \
+               if (status->FIELD != x) {                       \
+                       flags |= AFS_VNODE_META_CHANGED;        \
+                       status->FIELD = x;                      \
+               }                                               \
+       } while (0)
+
+#define EXTRACT_M8(FIELD)                                      \
+       do {                                                    \
+               u64 x = xdr_to_u64(xdr->FIELD);                 \
+               if (status->FIELD != x) {                       \
+                       flags |= AFS_VNODE_META_CHANGED;        \
+                       status->FIELD = x;                      \
+               }                                               \
+       } while (0)
+
+#define EXTRACT_D8(FIELD)                                      \
+       do {                                                    \
+               u64 x = xdr_to_u64(xdr->FIELD);                 \
+               if (status->FIELD != x) {                       \
+                       flags |= AFS_VNODE_DATA_CHANGED;        \
+                       status->FIELD = x;                      \
+               }                                               \
+       } while (0)
+
+       EXTRACT_M4(nlink);
+       EXTRACT_D8(size);
+       EXTRACT_D8(data_version);
+       EXTRACT_M8(author);
+       EXTRACT_M8(owner);
+       EXTRACT_M8(group);
+       EXTRACT_M4(mode);
+       EXTRACT_M4(caller_access); /* call ticket dependent */
+       EXTRACT_M4(anon_access);
+
+       status->mtime_client = xdr_to_time(xdr->mtime_client);
+       status->mtime_server = xdr_to_time(xdr->mtime_server);
+       status->lock_count   = ntohl(xdr->lock_count);
+
+       if (read_req) {
+               read_req->data_version = status->data_version;
+               read_req->file_size = status->size;
+       }
+
+       *_bp += xdr_size(xdr);
+
+       if (vnode) {
+               if (test_bit(AFS_VNODE_UNSET, &vnode->flags))
+                       flags |= AFS_VNODE_NOT_YET_SET;
+               afs_update_inode_from_status(vnode, status, expected_version,
+                                            flags);
+       }
+
+       return 0;
+
+bad:
+       xdr_dump_bad(*_bp);
+       return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+}
+
+/*
+ * Decode the file status.  We need to lock the target vnode if we're going to
+ * update its status so that stat() sees the attributes update atomically.
+ */
+static int yfs_decode_status(struct afs_call *call,
+                            const __be32 **_bp,
+                            struct afs_file_status *status,
+                            struct afs_vnode *vnode,
+                            const afs_dataversion_t *expected_version,
+                            struct afs_read *read_req)
+{
+       int ret;
+
+       if (!vnode)
+               return xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
+                                                expected_version, read_req);
+
+       write_seqlock(&vnode->cb_lock);
+       ret = xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
+                                       expected_version, read_req);
+       write_sequnlock(&vnode->cb_lock);
+       return ret;
+}
+
+/*
+ * Decode a YFSCallBack block
+ */
+static void xdr_decode_YFSCallBack(struct afs_call *call,
+                                  struct afs_vnode *vnode,
+                                  const __be32 **_bp)
+{
+       struct yfs_xdr_YFSCallBack *xdr = (void *)*_bp;
+       struct afs_cb_interest *old, *cbi = call->cbi;
+       u64 cb_expiry;
+
+       write_seqlock(&vnode->cb_lock);
+
+       if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
+               cb_expiry = xdr_to_u64(xdr->expiration_time);
+               do_div(cb_expiry, 10 * 1000 * 1000);
+               vnode->cb_version       = ntohl(xdr->version);
+               vnode->cb_type          = ntohl(xdr->type);
+               vnode->cb_expires_at    = cb_expiry + ktime_get_real_seconds();
+               old = vnode->cb_interest;
+               if (old != call->cbi) {
+                       vnode->cb_interest = cbi;
+                       cbi = old;
+               }
+               set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+       }
+
+       write_sequnlock(&vnode->cb_lock);
+       call->cbi = cbi;
+       *_bp += xdr_size(xdr);
+}
+
+static void xdr_decode_YFSCallBack_raw(const __be32 **_bp,
+                                      struct afs_callback *cb)
+{
+       struct yfs_xdr_YFSCallBack *x = (void *)*_bp;
+       u64 cb_expiry;
+
+       cb_expiry = xdr_to_u64(x->expiration_time);
+       do_div(cb_expiry, 10 * 1000 * 1000);
+       cb->version     = ntohl(x->version);
+       cb->type        = ntohl(x->type);
+       cb->expires_at  = cb_expiry + ktime_get_real_seconds();
+
+       *_bp += xdr_size(x);
+}
+
+/*
+ * Decode a YFSVolSync block
+ */
+static void xdr_decode_YFSVolSync(const __be32 **_bp,
+                                 struct afs_volsync *volsync)
+{
+       struct yfs_xdr_YFSVolSync *x = (void *)*_bp;
+       u64 creation;
+
+       if (volsync) {
+               creation = xdr_to_u64(x->vol_creation_date);
+               do_div(creation, 10 * 1000 * 1000);
+               volsync->creation = creation;
+       }
+
+       *_bp += xdr_size(x);
+}
+
+/*
+ * Encode the requested attributes into a YFSStoreStatus block
+ */
+static __be32 *xdr_encode_YFS_StoreStatus(__be32 *bp, struct iattr *attr)
+{
+       struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+       s64 mtime = 0, owner = 0, group = 0;
+       u32 mask = 0, mode = 0;
+
+       mask = 0;
+       if (attr->ia_valid & ATTR_MTIME) {
+               mask |= AFS_SET_MTIME;
+               mtime = linux_to_yfs_time(&attr->ia_mtime);
+       }
+
+       if (attr->ia_valid & ATTR_UID) {
+               mask |= AFS_SET_OWNER;
+               owner = from_kuid(&init_user_ns, attr->ia_uid);
+       }
+
+       if (attr->ia_valid & ATTR_GID) {
+               mask |= AFS_SET_GROUP;
+               group = from_kgid(&init_user_ns, attr->ia_gid);
+       }
+
+       if (attr->ia_valid & ATTR_MODE) {
+               mask |= AFS_SET_MODE;
+               mode = attr->ia_mode & S_IALLUGO;
+       }
+
+       x->mask         = htonl(mask);
+       x->mode         = htonl(mode);
+       x->mtime_client = u64_to_xdr(mtime);
+       x->owner        = u64_to_xdr(owner);
+       x->group        = u64_to_xdr(group);
+       return bp + xdr_size(x);
+}
+
+/*
+ * Decode a YFSFetchVolumeStatus block.
+ */
+static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp,
+                                           struct afs_volume_status *vs)
+{
+       const struct yfs_xdr_YFSFetchVolumeStatus *x = (const void *)*_bp;
+       u32 flags;
+
+       vs->vid                 = xdr_to_u64(x->vid);
+       vs->parent_id           = xdr_to_u64(x->parent_id);
+       flags                   = ntohl(x->flags);
+       vs->online              = flags & yfs_FVSOnline;
+       vs->in_service          = flags & yfs_FVSInservice;
+       vs->blessed             = flags & yfs_FVSBlessed;
+       vs->needs_salvage       = flags & yfs_FVSNeedsSalvage;
+       vs->type                = ntohl(x->type);
+       vs->min_quota           = 0;
+       vs->max_quota           = xdr_to_u64(x->max_quota);
+       vs->blocks_in_use       = xdr_to_u64(x->blocks_in_use);
+       vs->part_blocks_avail   = xdr_to_u64(x->part_blocks_avail);
+       vs->part_max_blocks     = xdr_to_u64(x->part_max_blocks);
+       vs->vol_copy_date       = xdr_to_u64(x->vol_copy_date);
+       vs->vol_backup_date     = xdr_to_u64(x->vol_backup_date);
+       *_bp += sizeof(*x) / sizeof(__be32);
+}
+
+/*
+ * deliver reply data to an FS.FetchStatus
+ */
+static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call)
+{
+       struct afs_vnode *vnode = call->reply[0];
+       const __be32 *bp;
+       int ret;
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_YFSCallBack(call, vnode, &bp);
+       xdr_decode_YFSVolSync(&bp, call->reply[1]);
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * YFS.FetchStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = {
+       .name           = "YFS.FetchStatus(vnode)",
+       .op             = yfs_FS_FetchStatus,
+       .deliver        = yfs_deliver_fs_fetch_status_vnode,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for a file.
+ */
+int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync,
+                            bool new_inode)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       __be32 *bp;
+
+       _enter(",%x,{%llx:%llu},,",
+              key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus_vnode,
+                                  sizeof(__be32) * 2 +
+                                  sizeof(struct yfs_xdr_YFSFid),
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSCallBack) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call) {
+               fc->ac.error = -ENOMEM;
+               return -ENOMEM;
+       }
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+       call->reply[1] = volsync;
+       call->expected_version = new_inode ? 1 : vnode->status.data_version;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSFETCHSTATUS);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       yfs_check_req(call, bp);
+
+       call->cb_break = fc->cb_break;
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an YFS.FetchData64.
+ */
+static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
+{
+       struct afs_vnode *vnode = call->reply[0];
+       struct afs_read *req = call->reply[2];
+       const __be32 *bp;
+       unsigned int size;
+       int ret;
+
+       _enter("{%u,%zu/%llu}",
+              call->unmarshall, iov_iter_count(&call->iter), req->actual_len);
+
+       switch (call->unmarshall) {
+       case 0:
+               req->actual_len = 0;
+               req->index = 0;
+               req->offset = req->pos & (PAGE_SIZE - 1);
+               afs_extract_to_tmp64(call);
+               call->unmarshall++;
+
+               /* extract the returned data length */
+       case 1:
+               _debug("extract data length");
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               req->actual_len = be64_to_cpu(call->tmp64);
+               _debug("DATA length: %llu", req->actual_len);
+               req->remain = min(req->len, req->actual_len);
+               if (req->remain == 0)
+                       goto no_more_data;
+
+               call->unmarshall++;
+
+       begin_page:
+               ASSERTCMP(req->index, <, req->nr_pages);
+               if (req->remain > PAGE_SIZE - req->offset)
+                       size = PAGE_SIZE - req->offset;
+               else
+                       size = req->remain;
+               call->bvec[0].bv_len = size;
+               call->bvec[0].bv_offset = req->offset;
+               call->bvec[0].bv_page = req->pages[req->index];
+               iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
+               ASSERTCMP(size, <=, PAGE_SIZE);
+
+               /* extract the returned data */
+       case 2:
+               _debug("extract data %zu/%llu",
+                      iov_iter_count(&call->iter), req->remain);
+
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+               req->remain -= call->bvec[0].bv_len;
+               req->offset += call->bvec[0].bv_len;
+               ASSERTCMP(req->offset, <=, PAGE_SIZE);
+               if (req->offset == PAGE_SIZE) {
+                       req->offset = 0;
+                       if (req->page_done)
+                               req->page_done(call, req);
+                       req->index++;
+                       if (req->remain > 0)
+                               goto begin_page;
+               }
+
+               ASSERTCMP(req->remain, ==, 0);
+               if (req->actual_len <= req->len)
+                       goto no_more_data;
+
+               /* Discard any excess data the server gave us */
+               iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
+               call->unmarshall = 3;
+       case 3:
+               _debug("extract discard %zu/%llu",
+                      iov_iter_count(&call->iter), req->actual_len - req->len);
+
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+       no_more_data:
+               call->unmarshall = 4;
+               afs_extract_to_buf(call,
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSCallBack) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+
+               /* extract the metadata */
+       case 4:
+               ret = afs_extract_data(call, false);
+               if (ret < 0)
+                       return ret;
+
+               bp = call->buffer;
+               ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+                                       &vnode->status.data_version, req);
+               if (ret < 0)
+                       return ret;
+               xdr_decode_YFSCallBack(call, vnode, &bp);
+               xdr_decode_YFSVolSync(&bp, call->reply[1]);
+
+               call->unmarshall++;
+
+       case 5:
+               break;
+       }
+
+       for (; req->index < req->nr_pages; req->index++) {
+               if (req->offset < PAGE_SIZE)
+                       zero_user_segment(req->pages[req->index],
+                                         req->offset, PAGE_SIZE);
+               if (req->page_done)
+                       req->page_done(call, req);
+               req->offset = 0;
+       }
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+static void yfs_fetch_data_destructor(struct afs_call *call)
+{
+       struct afs_read *req = call->reply[2];
+
+       afs_put_read(req);
+       afs_flat_call_destructor(call);
+}
+
+/*
+ * YFS.FetchData64 operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchData64 = {
+       .name           = "YFS.FetchData64",
+       .op             = yfs_FS_FetchData64,
+       .deliver        = yfs_deliver_fs_fetch_data64,
+       .destructor     = yfs_fetch_data_destructor,
+};
+
+/*
+ * Fetch data from a file.
+ */
+int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       __be32 *bp;
+
+       _enter(",%x,{%llx:%llu},%llx,%llx",
+              key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode,
+              req->pos, req->len);
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSFetchData64,
+                                  sizeof(__be32) * 2 +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  sizeof(struct yfs_xdr_u64) * 2,
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSCallBack) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+       call->reply[1] = NULL; /* volsync */
+       call->reply[2] = req;
+       call->expected_version = vnode->status.data_version;
+       call->want_reply_time = true;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSFETCHDATA64);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       bp = xdr_encode_u64(bp, req->pos);
+       bp = xdr_encode_u64(bp, req->len);
+       yfs_check_req(call, bp);
+
+       refcount_inc(&req->usage);
+       call->cb_break = fc->cb_break;
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data for YFS.CreateFile or YFS.MakeDir.
+ */
+static int yfs_deliver_fs_create_vnode(struct afs_call *call)
+{
+       struct afs_vnode *vnode = call->reply[0];
+       const __be32 *bp;
+       int ret;
+
+       _enter("{%u}", call->unmarshall);
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       xdr_decode_YFSFid(&bp, call->reply[1]);
+       ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_YFSCallBack_raw(&bp, call->reply[3]);
+       xdr_decode_YFSVolSync(&bp, NULL);
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * FS.CreateFile and FS.MakeDir operation type
+ */
+static const struct afs_call_type afs_RXFSCreateFile = {
+       .name           = "YFS.CreateFile",
+       .op             = yfs_FS_CreateFile,
+       .deliver        = yfs_deliver_fs_create_vnode,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Create a file.
+ */
+int yfs_fs_create_file(struct afs_fs_cursor *fc,
+                      const char *name,
+                      umode_t mode,
+                      u64 current_data_version,
+                      struct afs_fid *newfid,
+                      struct afs_file_status *newstatus,
+                      struct afs_callback *newcb)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       size_t namesz, reqsz, rplsz;
+       __be32 *bp;
+
+       _enter("");
+
+       namesz = strlen(name);
+       reqsz = (sizeof(__be32) +
+                sizeof(__be32) +
+                sizeof(struct yfs_xdr_YFSFid) +
+                xdr_strlen(namesz) +
+                sizeof(struct yfs_xdr_YFSStoreStatus) +
+                sizeof(__be32));
+       rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+                sizeof(struct yfs_xdr_YFSFetchStatus) +
+                sizeof(struct yfs_xdr_YFSFetchStatus) +
+                sizeof(struct yfs_xdr_YFSCallBack) +
+                sizeof(struct yfs_xdr_YFSVolSync));
+
+       call = afs_alloc_flat_call(net, &afs_RXFSCreateFile, reqsz, rplsz);
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+       call->reply[1] = newfid;
+       call->reply[2] = newstatus;
+       call->reply[3] = newcb;
+       call->expected_version = current_data_version + 1;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSCREATEFILE);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       bp = xdr_encode_string(bp, name, namesz);
+       bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
+       bp = xdr_encode_u32(bp, 0); /* ViceLockType */
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+static const struct afs_call_type yfs_RXFSMakeDir = {
+       .name           = "YFS.MakeDir",
+       .op             = yfs_FS_MakeDir,
+       .deliver        = yfs_deliver_fs_create_vnode,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Make a directory.
+ */
+int yfs_fs_make_dir(struct afs_fs_cursor *fc,
+                   const char *name,
+                   umode_t mode,
+                   u64 current_data_version,
+                   struct afs_fid *newfid,
+                   struct afs_file_status *newstatus,
+                   struct afs_callback *newcb)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       size_t namesz, reqsz, rplsz;
+       __be32 *bp;
+
+       _enter("");
+
+       namesz = strlen(name);
+       reqsz = (sizeof(__be32) +
+                sizeof(struct yfs_xdr_RPCFlags) +
+                sizeof(struct yfs_xdr_YFSFid) +
+                xdr_strlen(namesz) +
+                sizeof(struct yfs_xdr_YFSStoreStatus));
+       rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+                sizeof(struct yfs_xdr_YFSFetchStatus) +
+                sizeof(struct yfs_xdr_YFSFetchStatus) +
+                sizeof(struct yfs_xdr_YFSCallBack) +
+                sizeof(struct yfs_xdr_YFSVolSync));
+
+       call = afs_alloc_flat_call(net, &yfs_RXFSMakeDir, reqsz, rplsz);
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+       call->reply[1] = newfid;
+       call->reply[2] = newstatus;
+       call->reply[3] = newcb;
+       call->expected_version = current_data_version + 1;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSMAKEDIR);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       bp = xdr_encode_string(bp, name, namesz);
+       bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile2 operation.
+ */
+static int yfs_deliver_fs_remove_file2(struct afs_call *call)
+{
+       struct afs_vnode *dvnode = call->reply[0];
+       struct afs_vnode *vnode = call->reply[1];
+       struct afs_fid fid;
+       const __be32 *bp;
+       int ret;
+
+       _enter("{%u}", call->unmarshall);
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+
+       xdr_decode_YFSFid(&bp, &fid);
+       ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       /* Was deleted if vnode->status.abort_code == VNOVNODE. */
+
+       xdr_decode_YFSVolSync(&bp, NULL);
+       return 0;
+}
+
+/*
+ * YFS.RemoveFile2 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile2 = {
+       .name           = "YFS.RemoveFile2",
+       .op             = yfs_FS_RemoveFile2,
+       .deliver        = yfs_deliver_fs_remove_file2,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Remove a file and retrieve new file status.
+ */
+int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+                       const char *name, u64 current_data_version)
+{
+       struct afs_vnode *dvnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(dvnode);
+       size_t namesz;
+       __be32 *bp;
+
+       _enter("");
+
+       namesz = strlen(name);
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSRemoveFile2,
+                                  sizeof(__be32) +
+                                  sizeof(struct yfs_xdr_RPCFlags) +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  xdr_strlen(namesz),
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = dvnode;
+       call->reply[1] = vnode;
+       call->expected_version = current_data_version + 1;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSREMOVEFILE2);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+       bp = xdr_encode_string(bp, name, namesz);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &dvnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile or YFS.RemoveDir operation.
+ */
+static int yfs_deliver_fs_remove(struct afs_call *call)
+{
+       struct afs_vnode *dvnode = call->reply[0];
+       const __be32 *bp;
+       int ret;
+
+       _enter("{%u}", call->unmarshall);
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+
+       xdr_decode_YFSVolSync(&bp, NULL);
+       return 0;
+}
+
+/*
+ * FS.RemoveDir and FS.RemoveFile operation types.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile = {
+       .name           = "YFS.RemoveFile",
+       .op             = yfs_FS_RemoveFile,
+       .deliver        = yfs_deliver_fs_remove,
+       .destructor     = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type yfs_RXYFSRemoveDir = {
+       .name           = "YFS.RemoveDir",
+       .op             = yfs_FS_RemoveDir,
+       .deliver        = yfs_deliver_fs_remove,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * remove a file or directory
+ */
+int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+                 const char *name, bool isdir, u64 current_data_version)
+{
+       struct afs_vnode *dvnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(dvnode);
+       size_t namesz;
+       __be32 *bp;
+
+       _enter("");
+
+       namesz = strlen(name);
+       call = afs_alloc_flat_call(
+               net, isdir ? &yfs_RXYFSRemoveDir : &yfs_RXYFSRemoveFile,
+               sizeof(__be32) +
+               sizeof(struct yfs_xdr_RPCFlags) +
+               sizeof(struct yfs_xdr_YFSFid) +
+               xdr_strlen(namesz),
+               sizeof(struct yfs_xdr_YFSFetchStatus) +
+               sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = dvnode;
+       call->reply[1] = vnode;
+       call->expected_version = current_data_version + 1;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, isdir ? YFSREMOVEDIR : YFSREMOVEFILE);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+       bp = xdr_encode_string(bp, name, namesz);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &dvnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.Link operation.
+ */
+static int yfs_deliver_fs_link(struct afs_call *call)
+{
+       struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
+       const __be32 *bp;
+       int ret;
+
+       _enter("{%u}", call->unmarshall);
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_YFSVolSync(&bp, NULL);
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * YFS.Link operation type.
+ */
+static const struct afs_call_type yfs_RXYFSLink = {
+       .name           = "YFS.Link",
+       .op             = yfs_FS_Link,
+       .deliver        = yfs_deliver_fs_link,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Make a hard link.
+ */
+int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+               const char *name, u64 current_data_version)
+{
+       struct afs_vnode *dvnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       size_t namesz;
+       __be32 *bp;
+
+       _enter("");
+
+       namesz = strlen(name);
+       call = afs_alloc_flat_call(net, &yfs_RXYFSLink,
+                                  sizeof(__be32) +
+                                  sizeof(struct yfs_xdr_RPCFlags) +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  xdr_strlen(namesz) +
+                                  sizeof(struct yfs_xdr_YFSFid),
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = dvnode;
+       call->reply[1] = vnode;
+       call->expected_version = current_data_version + 1;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSLINK);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+       bp = xdr_encode_string(bp, name, namesz);
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.Symlink operation.
+ */
+static int yfs_deliver_fs_symlink(struct afs_call *call)
+{
+       struct afs_vnode *vnode = call->reply[0];
+       const __be32 *bp;
+       int ret;
+
+       _enter("{%u}", call->unmarshall);
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       xdr_decode_YFSFid(&bp, call->reply[1]);
+       ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_YFSVolSync(&bp, NULL);
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * YFS.Symlink operation type
+ */
+static const struct afs_call_type yfs_RXYFSSymlink = {
+       .name           = "YFS.Symlink",
+       .op             = yfs_FS_Symlink,
+       .deliver        = yfs_deliver_fs_symlink,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Create a symbolic link.
+ */
+int yfs_fs_symlink(struct afs_fs_cursor *fc,
+                  const char *name,
+                  const char *contents,
+                  u64 current_data_version,
+                  struct afs_fid *newfid,
+                  struct afs_file_status *newstatus)
+{
+       struct afs_vnode *dvnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(dvnode);
+       size_t namesz, contents_sz;
+       __be32 *bp;
+
+       _enter("");
+
+       namesz = strlen(name);
+       contents_sz = strlen(contents);
+       call = afs_alloc_flat_call(net, &yfs_RXYFSSymlink,
+                                  sizeof(__be32) +
+                                  sizeof(struct yfs_xdr_RPCFlags) +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  xdr_strlen(namesz) +
+                                  xdr_strlen(contents_sz) +
+                                  sizeof(struct yfs_xdr_YFSStoreStatus),
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = dvnode;
+       call->reply[1] = newfid;
+       call->reply[2] = newstatus;
+       call->expected_version = current_data_version + 1;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSSYMLINK);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+       bp = xdr_encode_string(bp, name, namesz);
+       bp = xdr_encode_string(bp, contents, contents_sz);
+       bp = xdr_encode_YFSStoreStatus_mode(bp, S_IRWXUGO);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &dvnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.Rename operation.
+ */
+static int yfs_deliver_fs_rename(struct afs_call *call)
+{
+       struct afs_vnode *orig_dvnode = call->reply[0];
+       struct afs_vnode *new_dvnode = call->reply[1];
+       const __be32 *bp;
+       int ret;
+
+       _enter("{%u}", call->unmarshall);
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       ret = yfs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       if (new_dvnode != orig_dvnode) {
+               ret = yfs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
+                                       &call->expected_version_2, NULL);
+               if (ret < 0)
+                       return ret;
+       }
+
+       xdr_decode_YFSVolSync(&bp, NULL);
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * YFS.Rename operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename = {
+       .name           = "FS.Rename",
+       .op             = yfs_FS_Rename,
+       .deliver        = yfs_deliver_fs_rename,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Rename a file or directory.
+ */
+int yfs_fs_rename(struct afs_fs_cursor *fc,
+                 const char *orig_name,
+                 struct afs_vnode *new_dvnode,
+                 const char *new_name,
+                 u64 current_orig_data_version,
+                 u64 current_new_data_version)
+{
+       struct afs_vnode *orig_dvnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(orig_dvnode);
+       size_t o_namesz, n_namesz;
+       __be32 *bp;
+
+       _enter("");
+
+       o_namesz = strlen(orig_name);
+       n_namesz = strlen(new_name);
+       call = afs_alloc_flat_call(net, &yfs_RXYFSRename,
+                                  sizeof(__be32) +
+                                  sizeof(struct yfs_xdr_RPCFlags) +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  xdr_strlen(o_namesz) +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  xdr_strlen(n_namesz),
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = orig_dvnode;
+       call->reply[1] = new_dvnode;
+       call->expected_version = current_orig_data_version + 1;
+       call->expected_version_2 = current_new_data_version + 1;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSRENAME);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &orig_dvnode->fid);
+       bp = xdr_encode_string(bp, orig_name, o_namesz);
+       bp = xdr_encode_YFSFid(bp, &new_dvnode->fid);
+       bp = xdr_encode_string(bp, new_name, n_namesz);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &orig_dvnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.StoreData64 operation.
+ */
+static int yfs_deliver_fs_store_data(struct afs_call *call)
+{
+       struct afs_vnode *vnode = call->reply[0];
+       const __be32 *bp;
+       int ret;
+
+       _enter("");
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_YFSVolSync(&bp, NULL);
+
+       afs_pages_written_back(vnode, call);
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * YFS.StoreData64 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSStoreData64 = {
+       .name           = "YFS.StoreData64",
+       .op             = yfs_FS_StoreData64,
+       .deliver        = yfs_deliver_fs_store_data,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Store a set of pages to a large file.
+ */
+int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
+                     pgoff_t first, pgoff_t last,
+                     unsigned offset, unsigned to)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       loff_t size, pos, i_size;
+       __be32 *bp;
+
+       _enter(",%x,{%llx:%llu},,",
+              key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+       size = (loff_t)to - (loff_t)offset;
+       if (first != last)
+               size += (loff_t)(last - first) << PAGE_SHIFT;
+       pos = (loff_t)first << PAGE_SHIFT;
+       pos += offset;
+
+       i_size = i_size_read(&vnode->vfs_inode);
+       if (pos + size > i_size)
+               i_size = size + pos;
+
+       _debug("size %llx, at %llx, i_size %llx",
+              (unsigned long long)size, (unsigned long long)pos,
+              (unsigned long long)i_size);
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64,
+                                  sizeof(__be32) +
+                                  sizeof(__be32) +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  sizeof(struct yfs_xdr_YFSStoreStatus) +
+                                  sizeof(struct yfs_xdr_u64) * 3,
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->mapping = mapping;
+       call->reply[0] = vnode;
+       call->first = first;
+       call->last = last;
+       call->first_offset = offset;
+       call->last_to = to;
+       call->send_pages = true;
+       call->expected_version = vnode->status.data_version + 1;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       bp = xdr_encode_YFSStoreStatus_mtime(bp, &vnode->vfs_inode.i_mtime);
+       bp = xdr_encode_u64(bp, pos);
+       bp = xdr_encode_u64(bp, size);
+       bp = xdr_encode_u64(bp, i_size);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * deliver reply data to an FS.StoreStatus
+ */
+static int yfs_deliver_fs_store_status(struct afs_call *call)
+{
+       struct afs_vnode *vnode = call->reply[0];
+       const __be32 *bp;
+       int ret;
+
+       _enter("");
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_YFSVolSync(&bp, NULL);
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * YFS.StoreStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSStoreStatus = {
+       .name           = "YFS.StoreStatus",
+       .op             = yfs_FS_StoreStatus,
+       .deliver        = yfs_deliver_fs_store_status,
+       .destructor     = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = {
+       .name           = "YFS.StoreData64",
+       .op             = yfs_FS_StoreData64,
+       .deliver        = yfs_deliver_fs_store_status,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 rather than
+ * YFS.StoreStatus so as to alter the file size also.
+ */
+static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       __be32 *bp;
+
+       _enter(",%x,{%llx:%llu},,",
+              key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64_as_Status,
+                                  sizeof(__be32) * 2 +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  sizeof(struct yfs_xdr_YFSStoreStatus) +
+                                  sizeof(struct yfs_xdr_u64) * 3,
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+       call->expected_version = vnode->status.data_version + 1;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       bp = xdr_encode_YFS_StoreStatus(bp, attr);
+       bp = xdr_encode_u64(bp, 0);             /* position of start of write */
+       bp = xdr_encode_u64(bp, 0);             /* size of write */
+       bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 if there's a change in
+ * file size, and YFS.StoreStatus otherwise.
+ */
+int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       __be32 *bp;
+
+       if (attr->ia_valid & ATTR_SIZE)
+               return yfs_fs_setattr_size(fc, attr);
+
+       _enter(",%x,{%llx:%llu},,",
+              key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSStoreStatus,
+                                  sizeof(__be32) * 2 +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  sizeof(struct yfs_xdr_YFSStoreStatus),
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+       call->expected_version = vnode->status.data_version;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSSTORESTATUS);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       bp = xdr_encode_YFS_StoreStatus(bp, attr);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.GetVolumeStatus operation.
+ */
+static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
+{
+       const __be32 *bp;
+       char *p;
+       u32 size;
+       int ret;
+
+       _enter("{%u}", call->unmarshall);
+
+       switch (call->unmarshall) {
+       case 0:
+               call->unmarshall++;
+               afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus));
+
+               /* extract the returned status record */
+       case 1:
+               _debug("extract status");
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               bp = call->buffer;
+               xdr_decode_YFSFetchVolumeStatus(&bp, call->reply[1]);
+               call->unmarshall++;
+               afs_extract_to_tmp(call);
+
+               /* extract the volume name length */
+       case 2:
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               call->count = ntohl(call->tmp);
+               _debug("volname length: %u", call->count);
+               if (call->count >= AFSNAMEMAX)
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_volname_len);
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
+               call->unmarshall++;
+
+               /* extract the volume name */
+       case 3:
+               _debug("extract volname");
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               p = call->reply[2];
+               p[call->count] = 0;
+               _debug("volname '%s'", p);
+               afs_extract_to_tmp(call);
+               call->unmarshall++;
+
+               /* extract the offline message length */
+       case 4:
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               call->count = ntohl(call->tmp);
+               _debug("offline msg length: %u", call->count);
+               if (call->count >= AFSNAMEMAX)
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_offline_msg_len);
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
+               call->unmarshall++;
+
+               /* extract the offline message */
+       case 5:
+               _debug("extract offline");
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               p = call->reply[2];
+               p[call->count] = 0;
+               _debug("offline '%s'", p);
+
+               afs_extract_to_tmp(call);
+               call->unmarshall++;
+
+               /* extract the message of the day length */
+       case 6:
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               call->count = ntohl(call->tmp);
+               _debug("motd length: %u", call->count);
+               if (call->count >= AFSNAMEMAX)
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_motd_len);
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
+               call->unmarshall++;
+
+               /* extract the message of the day */
+       case 7:
+               _debug("extract motd");
+               ret = afs_extract_data(call, false);
+               if (ret < 0)
+                       return ret;
+
+               p = call->reply[2];
+               p[call->count] = 0;
+               _debug("motd '%s'", p);
+
+               call->unmarshall++;
+
+       case 8:
+               break;
+       }
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * Destroy a YFS.GetVolumeStatus call.
+ */
+static void yfs_get_volume_status_call_destructor(struct afs_call *call)
+{
+       kfree(call->reply[2]);
+       call->reply[2] = NULL;
+       afs_flat_call_destructor(call);
+}
+
+/*
+ * YFS.GetVolumeStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSGetVolumeStatus = {
+       .name           = "YFS.GetVolumeStatus",
+       .op             = yfs_FS_GetVolumeStatus,
+       .deliver        = yfs_deliver_fs_get_volume_status,
+       .destructor     = yfs_get_volume_status_call_destructor,
+};
+
+/*
+ * fetch the status of a volume
+ */
+int yfs_fs_get_volume_status(struct afs_fs_cursor *fc,
+                            struct afs_volume_status *vs)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       __be32 *bp;
+       void *tmpbuf;
+
+       _enter("");
+
+       tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
+       if (!tmpbuf)
+               return -ENOMEM;
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSGetVolumeStatus,
+                                  sizeof(__be32) * 2 +
+                                  sizeof(struct yfs_xdr_u64),
+                                  sizeof(struct yfs_xdr_YFSFetchVolumeStatus) +
+                                  sizeof(__be32));
+       if (!call) {
+               kfree(tmpbuf);
+               return -ENOMEM;
+       }
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+       call->reply[1] = vs;
+       call->reply[2] = tmpbuf;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSGETVOLUMESTATUS);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_u64(bp, vnode->fid.vid);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an YFS.SetLock, YFS.ExtendLock or YFS.ReleaseLock
+ */
+static int yfs_deliver_fs_xxxx_lock(struct afs_call *call)
+{
+       struct afs_vnode *vnode = call->reply[0];
+       const __be32 *bp;
+       int ret;
+
+       _enter("{%u}", call->unmarshall);
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_YFSVolSync(&bp, NULL);
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * YFS.SetLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSSetLock = {
+       .name           = "YFS.SetLock",
+       .op             = yfs_FS_SetLock,
+       .deliver        = yfs_deliver_fs_xxxx_lock,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ExtendLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSExtendLock = {
+       .name           = "YFS.ExtendLock",
+       .op             = yfs_FS_ExtendLock,
+       .deliver        = yfs_deliver_fs_xxxx_lock,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ReleaseLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSReleaseLock = {
+       .name           = "YFS.ReleaseLock",
+       .op             = yfs_FS_ReleaseLock,
+       .deliver        = yfs_deliver_fs_xxxx_lock,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Set a lock on a file
+ */
+int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       __be32 *bp;
+
+       _enter("");
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSSetLock,
+                                  sizeof(__be32) * 2 +
+                                  sizeof(struct yfs_xdr_YFSFid) +
+                                  sizeof(__be32),
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSSETLOCK);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       bp = xdr_encode_u32(bp, type);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * extend a lock on a file
+ */
+int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       __be32 *bp;
+
+       _enter("");
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSExtendLock,
+                                  sizeof(__be32) * 2 +
+                                  sizeof(struct yfs_xdr_YFSFid),
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSEXTENDLOCK);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * release a lock on a file
+ */
+int yfs_fs_release_lock(struct afs_fs_cursor *fc)
+{
+       struct afs_vnode *vnode = fc->vnode;
+       struct afs_call *call;
+       struct afs_net *net = afs_v2net(vnode);
+       __be32 *bp;
+
+       _enter("");
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSReleaseLock,
+                                  sizeof(__be32) * 2 +
+                                  sizeof(struct yfs_xdr_YFSFid),
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call)
+               return -ENOMEM;
+
+       call->key = fc->key;
+       call->reply[0] = vnode;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSRELEASELOCK);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, &vnode->fid);
+       yfs_check_req(call, bp);
+
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &vnode->fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an FS.FetchStatus with no vnode.
+ */
+static int yfs_deliver_fs_fetch_status(struct afs_call *call)
+{
+       struct afs_file_status *status = call->reply[1];
+       struct afs_callback *callback = call->reply[2];
+       struct afs_volsync *volsync = call->reply[3];
+       struct afs_vnode *vnode = call->reply[0];
+       const __be32 *bp;
+       int ret;
+
+       ret = afs_transfer_reply(call);
+       if (ret < 0)
+               return ret;
+
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+       /* unmarshall the reply once we've received all of it */
+       bp = call->buffer;
+       ret = yfs_decode_status(call, &bp, status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_YFSCallBack_raw(&bp, callback);
+       xdr_decode_YFSVolSync(&bp, volsync);
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * YFS.FetchStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchStatus = {
+       .name           = "YFS.FetchStatus",
+       .op             = yfs_FS_FetchStatus,
+       .deliver        = yfs_deliver_fs_fetch_status,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for a fid without needing a vnode handle.
+ */
+int yfs_fs_fetch_status(struct afs_fs_cursor *fc,
+                       struct afs_net *net,
+                       struct afs_fid *fid,
+                       struct afs_file_status *status,
+                       struct afs_callback *callback,
+                       struct afs_volsync *volsync)
+{
+       struct afs_call *call;
+       __be32 *bp;
+
+       _enter(",%x,{%llx:%llu},,",
+              key_serial(fc->key), fid->vid, fid->vnode);
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus,
+                                  sizeof(__be32) * 2 +
+                                  sizeof(struct yfs_xdr_YFSFid),
+                                  sizeof(struct yfs_xdr_YFSFetchStatus) +
+                                  sizeof(struct yfs_xdr_YFSCallBack) +
+                                  sizeof(struct yfs_xdr_YFSVolSync));
+       if (!call) {
+               fc->ac.error = -ENOMEM;
+               return -ENOMEM;
+       }
+
+       call->key = fc->key;
+       call->reply[0] = NULL; /* vnode for fid[0] */
+       call->reply[1] = status;
+       call->reply[2] = callback;
+       call->reply[3] = volsync;
+       call->expected_version = 1; /* vnode->status.data_version */
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSFETCHSTATUS);
+       bp = xdr_encode_u32(bp, 0); /* RPC flags */
+       bp = xdr_encode_YFSFid(bp, fid);
+       yfs_check_req(call, bp);
+
+       call->cb_break = fc->cb_break;
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, fid);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an YFS.InlineBulkStatus call
+ */
+static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
+{
+       struct afs_file_status *statuses;
+       struct afs_callback *callbacks;
+       struct afs_vnode *vnode = call->reply[0];
+       const __be32 *bp;
+       u32 tmp;
+       int ret;
+
+       _enter("{%u}", call->unmarshall);
+
+       switch (call->unmarshall) {
+       case 0:
+               afs_extract_to_tmp(call);
+               call->unmarshall++;
+
+               /* Extract the file status count and array in two steps */
+       case 1:
+               _debug("extract status count");
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               tmp = ntohl(call->tmp);
+               _debug("status count: %u/%u", tmp, call->count2);
+               if (tmp != call->count2)
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_ibulkst_count);
+
+               call->count = 0;
+               call->unmarshall++;
+       more_counts:
+               afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus));
+
+       case 2:
+               _debug("extract status array %u", call->count);
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               bp = call->buffer;
+               statuses = call->reply[1];
+               ret = yfs_decode_status(call, &bp, &statuses[call->count],
+                                       call->count == 0 ? vnode : NULL,
+                                       NULL, NULL);
+               if (ret < 0)
+                       return ret;
+
+               call->count++;
+               if (call->count < call->count2)
+                       goto more_counts;
+
+               call->count = 0;
+               call->unmarshall++;
+               afs_extract_to_tmp(call);
+
+               /* Extract the callback count and array in two steps */
+       case 3:
+               _debug("extract CB count");
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               tmp = ntohl(call->tmp);
+               _debug("CB count: %u", tmp);
+               if (tmp != call->count2)
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_ibulkst_cb_count);
+               call->count = 0;
+               call->unmarshall++;
+       more_cbs:
+               afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack));
+
+       case 4:
+               _debug("extract CB array");
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               _debug("unmarshall CB array");
+               bp = call->buffer;
+               callbacks = call->reply[2];
+               xdr_decode_YFSCallBack_raw(&bp, &callbacks[call->count]);
+               statuses = call->reply[1];
+               if (call->count == 0 && vnode && statuses[0].abort_code == 0) {
+                       bp = call->buffer;
+                       xdr_decode_YFSCallBack(call, vnode, &bp);
+               }
+               call->count++;
+               if (call->count < call->count2)
+                       goto more_cbs;
+
+               afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync));
+               call->unmarshall++;
+
+       case 5:
+               ret = afs_extract_data(call, false);
+               if (ret < 0)
+                       return ret;
+
+               bp = call->buffer;
+               xdr_decode_YFSVolSync(&bp, call->reply[3]);
+
+               call->unmarshall++;
+
+       case 6:
+               break;
+       }
+
+       _leave(" = 0 [done]");
+       return 0;
+}
+
+/*
+ * FS.InlineBulkStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSInlineBulkStatus = {
+       .name           = "YFS.InlineBulkStatus",
+       .op             = yfs_FS_InlineBulkStatus,
+       .deliver        = yfs_deliver_fs_inline_bulk_status,
+       .destructor     = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for up to 1024 files
+ */
+int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
+                             struct afs_net *net,
+                             struct afs_fid *fids,
+                             struct afs_file_status *statuses,
+                             struct afs_callback *callbacks,
+                             unsigned int nr_fids,
+                             struct afs_volsync *volsync)
+{
+       struct afs_call *call;
+       __be32 *bp;
+       int i;
+
+       _enter(",%x,{%llx:%llu},%u",
+              key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids);
+
+       call = afs_alloc_flat_call(net, &yfs_RXYFSInlineBulkStatus,
+                                  sizeof(__be32) +
+                                  sizeof(__be32) +
+                                  sizeof(__be32) +
+                                  sizeof(struct yfs_xdr_YFSFid) * nr_fids,
+                                  sizeof(struct yfs_xdr_YFSFetchStatus));
+       if (!call) {
+               fc->ac.error = -ENOMEM;
+               return -ENOMEM;
+       }
+
+       call->key = fc->key;
+       call->reply[0] = NULL; /* vnode for fid[0] */
+       call->reply[1] = statuses;
+       call->reply[2] = callbacks;
+       call->reply[3] = volsync;
+       call->count2 = nr_fids;
+
+       /* marshall the parameters */
+       bp = call->request;
+       bp = xdr_encode_u32(bp, YFSINLINEBULKSTATUS);
+       bp = xdr_encode_u32(bp, 0); /* RPCFlags */
+       bp = xdr_encode_u32(bp, nr_fids);
+       for (i = 0; i < nr_fids; i++)
+               bp = xdr_encode_YFSFid(bp, &fids[i]);
+       yfs_check_req(call, bp);
+
+       call->cb_break = fc->cb_break;
+       afs_use_fs_server(call, fc->cbi);
+       trace_afs_make_fs_call(call, &fids[0]);
+       return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
index 9a69392f1fb375c4c16b07332248edf7da57c9b6..d81c148682e715a9f0ed4937185dd3b5538a9603 100644 (file)
@@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 
        s->s_magic = BFS_MAGIC;
 
-       if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
+       if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
+           le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
                printf("Superblock is corrupted\n");
                goto out1;
        }
@@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
                                        sizeof(struct bfs_inode)
                                        + BFS_ROOT_INO - 1;
        imap_len = (info->si_lasti / 8) + 1;
-       info->si_imap = kzalloc(imap_len, GFP_KERNEL);
-       if (!info->si_imap)
+       info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
+       if (!info->si_imap) {
+               printf("Cannot allocate %u bytes\n", imap_len);
                goto out1;
+       }
        for (i = 0; i < BFS_ROOT_INO; i++)
                set_bit(i, info->si_imap);
 
index 38b8ce05cbc7e693d5d4b835eb586c586680ef0c..a80b4f0ee7c4f172d19b4df1c8c330e4c0738557 100644 (file)
@@ -349,7 +349,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 
        dio->size = 0;
        dio->multi_bio = false;
-       dio->should_dirty = is_read && (iter->type == ITER_IOVEC);
+       dio->should_dirty = is_read && iter_is_iovec(iter);
 
        blk_start_plug(&plug);
        for (;;) {
index 68ca41dbbef387f93de28ca845c144fc37bbab79..80953528572db52af07ceae9ec782b284ab29610 100644 (file)
@@ -3201,9 +3201,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
                                struct btrfs_ioctl_space_info *space);
 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
                               struct btrfs_ioctl_balance_args *bargs);
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
-                           struct file *dst_file, loff_t dst_loff,
-                           u64 olen);
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
@@ -3233,8 +3230,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
                      size_t num_pages, loff_t pos, size_t write_bytes,
                      struct extent_state **cached);
 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
-int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
-                          struct file *file_out, loff_t pos_out, u64 len);
+loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
+                             struct file *file_out, loff_t pos_out,
+                             loff_t len, unsigned int remap_flags);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
index 97c7a086f7bd69f1dddb68fbdbf5c8bcc995024c..a3c22e16509b3067e65e86f1b98d2d9e9e84d99a 100644 (file)
@@ -3298,8 +3298,7 @@ const struct file_operations btrfs_file_operations = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = btrfs_compat_ioctl,
 #endif
-       .clone_file_range = btrfs_clone_file_range,
-       .dedupe_file_range = btrfs_dedupe_file_range,
+       .remap_file_range = btrfs_remap_file_range,
 };
 
 void __cold btrfs_auto_defrag_exit(void)
index a990a904513929d8e735ba00d3f120dbed975f6c..3ca6943827ef88e536b2d6c924e7664a3ab835e7 100644 (file)
@@ -3629,26 +3629,6 @@ out_unlock:
        return ret;
 }
 
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
-                           struct file *dst_file, loff_t dst_loff,
-                           u64 olen)
-{
-       struct inode *src = file_inode(src_file);
-       struct inode *dst = file_inode(dst_file);
-       u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
-
-       if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
-               /*
-                * Btrfs does not support blocksize < page_size. As a
-                * result, btrfs_cmp_data() won't correctly handle
-                * this situation without an update.
-                */
-               return -EINVAL;
-       }
-
-       return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
-}
-
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
                                     struct inode *inode,
                                     u64 endoff,
@@ -4350,10 +4330,34 @@ out_unlock:
        return ret;
 }
 
-int btrfs_clone_file_range(struct file *src_file, loff_t off,
-               struct file *dst_file, loff_t destoff, u64 len)
+loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
+               struct file *dst_file, loff_t destoff, loff_t len,
+               unsigned int remap_flags)
 {
-       return btrfs_clone_files(dst_file, src_file, off, len, destoff);
+       int ret;
+
+       if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+               return -EINVAL;
+
+       if (remap_flags & REMAP_FILE_DEDUP) {
+               struct inode *src = file_inode(src_file);
+               struct inode *dst = file_inode(dst_file);
+               u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
+
+               if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
+                       /*
+                        * Btrfs does not support blocksize < page_size. As a
+                        * result, btrfs_cmp_data() won't correctly handle
+                        * this situation without an update.
+                        */
+                       return -EINVAL;
+               }
+
+               ret = btrfs_extent_same(src, off, len, dst, destoff);
+       } else {
+               ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
+       }
+       return ret < 0 ? ret : len;
 }
 
 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
index d60d61e8ed7de495bddd0bc799f16c2606a4c68b..1286c2b95498de47d2ba08b57a93901bdf4367bd 100644 (file)
@@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
         */
        bio = bio_alloc(GFP_NOIO, 1);
 
+       if (wbc) {
+               wbc_init_bio(wbc, bio);
+               wbc_account_io(wbc, bh->b_page, bh->b_size);
+       }
+
        bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
        bio_set_dev(bio, bh->b_bdev);
        bio->bi_write_hint = write_hint;
@@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
                op_flags |= REQ_PRIO;
        bio_set_op_attrs(bio, op, op_flags);
 
-       if (wbc) {
-               wbc_init_bio(wbc, bio);
-               wbc_account_io(wbc, bh->b_page, bh->b_size);
-       }
-
        submit_bio(bio);
        return 0;
 }
index f788496fafcc9eeab5907cdcd7c68b16e0aecf8a..189df668b6a0cf0dc2d000184d170ce58d248076 100644 (file)
@@ -615,7 +615,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 
                more = len < iov_iter_count(to);
 
-               if (unlikely(to->type & ITER_PIPE)) {
+               if (unlikely(iov_iter_is_pipe(to))) {
                        ret = iov_iter_get_pages_alloc(to, &pages, len,
                                                       &page_off);
                        if (ret <= 0) {
@@ -662,7 +662,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                        ret += zlen;
                }
 
-               if (unlikely(to->type & ITER_PIPE)) {
+               if (unlikely(iov_iter_is_pipe(to))) {
                        if (ret > 0) {
                                iov_iter_advance(to, ret);
                                off += ret;
@@ -815,7 +815,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
                                aio_req->total_len = rc + zlen;
                        }
 
-                       iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs,
+                       iov_iter_bvec(&i, READ, osd_data->bvec_pos.bvecs,
                                      osd_data->num_bvecs,
                                      osd_data->bvec_pos.iter.bi_size);
                        iov_iter_advance(&i, rc);
@@ -1038,8 +1038,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                                int zlen = min_t(size_t, len - ret,
                                                 size - pos - ret);
 
-                               iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages,
-                                             len);
+                               iov_iter_bvec(&i, READ, bvecs, num_pages, len);
                                iov_iter_advance(&i, ret);
                                iov_iter_zero(zlen, &i);
                                ret += zlen;
@@ -1932,10 +1931,17 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
        if (!prealloc_cf)
                return -ENOMEM;
 
-       /* Start by sync'ing the source file */
+       /* Start by sync'ing the source and destination files */
        ret = file_write_and_wait_range(src_file, src_off, (src_off + len));
-       if (ret < 0)
+       if (ret < 0) {
+               dout("failed to write src file (%zd)\n", ret);
+               goto out;
+       }
+       ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len));
+       if (ret < 0) {
+               dout("failed to write dst file (%zd)\n", ret);
                goto out;
+       }
 
        /*
         * We need FILE_WR caps for dst_ci and FILE_RD for src_ci as other
index 67a9aeb2f4ecdc66ea3cfd6131bf0e4082cb0691..bd13a3267ae03c401d7b0dd0c1f37626bbc42b0a 100644 (file)
@@ -80,12 +80,8 @@ static int parse_reply_info_in(void **p, void *end,
        info->symlink = *p;
        *p += info->symlink_len;
 
-       if (features & CEPH_FEATURE_DIRLAYOUTHASH)
-               ceph_decode_copy_safe(p, end, &info->dir_layout,
-                                     sizeof(info->dir_layout), bad);
-       else
-               memset(&info->dir_layout, 0, sizeof(info->dir_layout));
-
+       ceph_decode_copy_safe(p, end, &info->dir_layout,
+                             sizeof(info->dir_layout), bad);
        ceph_decode_32_safe(p, end, info->xattr_len, bad);
        ceph_decode_need(p, end, info->xattr_len, bad);
        info->xattr_data = *p;
@@ -3182,10 +3178,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
        recon_state.pagelist = pagelist;
        if (session->s_con.peer_features & CEPH_FEATURE_MDSENC)
                recon_state.msg_version = 3;
-       else if (session->s_con.peer_features & CEPH_FEATURE_FLOCK)
-               recon_state.msg_version = 2;
        else
-               recon_state.msg_version = 1;
+               recon_state.msg_version = 2;
        err = iterate_session_caps(session, encode_caps_cb, &recon_state);
        if (err < 0)
                goto fail;
index 32d4f13784ba5da85e420a565297eff6b3bf132a..03f4d24db8fe009dc4384b83162979c34f11d1e0 100644 (file)
@@ -237,7 +237,8 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
                ceph_put_snap_realm(mdsc, realm);
                realm = next;
        }
-       ceph_put_snap_realm(mdsc, realm);
+       if (realm)
+               ceph_put_snap_realm(mdsc, realm);
        up_read(&mdsc->snap_rwsem);
 
        return exceeded;
index 3e812428ac8d95210c78ac51bb84852b9ec9ad32..ba178b09de0b48f1ce0e2285899f66de91cd2741 100644 (file)
@@ -145,6 +145,58 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
                seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr);
 }
 
+static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
+{
+       struct list_head *stmp, *tmp, *tmp1, *tmp2;
+       struct TCP_Server_Info *server;
+       struct cifs_ses *ses;
+       struct cifs_tcon *tcon;
+       struct cifsFileInfo *cfile;
+
+       seq_puts(m, "# Version:1\n");
+       seq_puts(m, "# Format:\n");
+       seq_puts(m, "# <tree id> <persistent fid> <flags> <count> <pid> <uid>");
+#ifdef CONFIG_CIFS_DEBUG2
+       seq_printf(m, " <filename> <mid>\n");
+#else
+       seq_printf(m, " <filename>\n");
+#endif /* CIFS_DEBUG2 */
+       spin_lock(&cifs_tcp_ses_lock);
+       list_for_each(stmp, &cifs_tcp_ses_list) {
+               server = list_entry(stmp, struct TCP_Server_Info,
+                                   tcp_ses_list);
+               list_for_each(tmp, &server->smb_ses_list) {
+                       ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+                       list_for_each(tmp1, &ses->tcon_list) {
+                               tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+                               spin_lock(&tcon->open_file_lock);
+                               list_for_each(tmp2, &tcon->openFileList) {
+                                       cfile = list_entry(tmp2, struct cifsFileInfo,
+                                                    tlist);
+                                       seq_printf(m,
+                                               "0x%x 0x%llx 0x%x %d %d %d %s",
+                                               tcon->tid,
+                                               cfile->fid.persistent_fid,
+                                               cfile->f_flags,
+                                               cfile->count,
+                                               cfile->pid,
+                                               from_kuid(&init_user_ns, cfile->uid),
+                                               cfile->dentry->d_name.name);
+#ifdef CONFIG_CIFS_DEBUG2
+                                       seq_printf(m, " 0x%llx\n", cfile->fid.mid);
+#else
+                                       seq_printf(m, "\n");
+#endif /* CIFS_DEBUG2 */
+                               }
+                               spin_unlock(&tcon->open_file_lock);
+                       }
+               }
+       }
+       spin_unlock(&cifs_tcp_ses_lock);
+       seq_putc(m, '\n');
+       return 0;
+}
+
 static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 {
        struct list_head *tmp1, *tmp2, *tmp3;
@@ -565,6 +617,9 @@ cifs_proc_init(void)
        proc_create_single("DebugData", 0, proc_fs_cifs,
                        cifs_debug_data_proc_show);
 
+       proc_create_single("open_files", 0400, proc_fs_cifs,
+                       cifs_debug_files_proc_show);
+
        proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops);
        proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops);
        proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops);
@@ -601,6 +656,7 @@ cifs_proc_clean(void)
                return;
 
        remove_proc_entry("DebugData", proc_fs_cifs);
+       remove_proc_entry("open_files", proc_fs_cifs);
        remove_proc_entry("cifsFYI", proc_fs_cifs);
        remove_proc_entry("traceSMB", proc_fs_cifs);
        remove_proc_entry("Stats", proc_fs_cifs);
index b611fc2e8984e0fce26c08367b1306764685054a..7f01c6e607918d4e1356a9e7d305113777910089 100644 (file)
@@ -147,8 +147,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
                sprintf(dp, ";sec=krb5");
        else if (server->sec_mskerberos)
                sprintf(dp, ";sec=mskrb5");
-       else
-               goto out;
+       else {
+               cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n");
+               sprintf(dp, ";sec=krb5");
+       }
 
        dp = description + strlen(description);
        sprintf(dp, ";uid=0x%x",
index 7de9603c54f10383c086f99dcfc5a5f17a8c716e..865706edb307dfd04fe66822b26c2461f87d49ab 100644 (file)
@@ -992,17 +992,21 @@ const struct inode_operations cifs_symlink_inode_ops = {
        .listxattr = cifs_listxattr,
 };
 
-static int cifs_clone_file_range(struct file *src_file, loff_t off,
-               struct file *dst_file, loff_t destoff, u64 len)
+static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
+               struct file *dst_file, loff_t destoff, loff_t len,
+               unsigned int remap_flags)
 {
        struct inode *src_inode = file_inode(src_file);
        struct inode *target_inode = file_inode(dst_file);
        struct cifsFileInfo *smb_file_src = src_file->private_data;
-       struct cifsFileInfo *smb_file_target = dst_file->private_data;
-       struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink);
+       struct cifsFileInfo *smb_file_target;
+       struct cifs_tcon *target_tcon;
        unsigned int xid;
        int rc;
 
+       if (remap_flags & ~REMAP_FILE_ADVISORY)
+               return -EINVAL;
+
        cifs_dbg(FYI, "clone range\n");
 
        xid = get_xid();
@@ -1013,6 +1017,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
                goto out;
        }
 
+       smb_file_target = dst_file->private_data;
+       target_tcon = tlink_tcon(smb_file_target->tlink);
+
        /*
         * Note: cifs case is easier than btrfs since server responsible for
         * checks for proper open modes and file type and if it wants
@@ -1042,7 +1049,7 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
        unlock_two_nondirectories(src_inode, target_inode);
 out:
        free_xid(xid);
-       return rc;
+       return rc < 0 ? rc : len;
 }
 
 ssize_t cifs_file_copychunk_range(unsigned int xid,
@@ -1151,7 +1158,7 @@ const struct file_operations cifs_file_ops = {
        .llseek = cifs_llseek,
        .unlocked_ioctl = cifs_ioctl,
        .copy_file_range = cifs_copy_file_range,
-       .clone_file_range = cifs_clone_file_range,
+       .remap_file_range = cifs_remap_file_range,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
 };
@@ -1170,15 +1177,14 @@ const struct file_operations cifs_file_strict_ops = {
        .llseek = cifs_llseek,
        .unlocked_ioctl = cifs_ioctl,
        .copy_file_range = cifs_copy_file_range,
-       .clone_file_range = cifs_clone_file_range,
+       .remap_file_range = cifs_remap_file_range,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
 };
 
 const struct file_operations cifs_file_direct_ops = {
-       /* BB reevaluate whether they can be done with directio, no cache */
-       .read_iter = cifs_user_readv,
-       .write_iter = cifs_user_writev,
+       .read_iter = cifs_direct_readv,
+       .write_iter = cifs_direct_writev,
        .open = cifs_open,
        .release = cifs_close,
        .lock = cifs_lock,
@@ -1189,7 +1195,7 @@ const struct file_operations cifs_file_direct_ops = {
        .splice_write = iter_file_splice_write,
        .unlocked_ioctl  = cifs_ioctl,
        .copy_file_range = cifs_copy_file_range,
-       .clone_file_range = cifs_clone_file_range,
+       .remap_file_range = cifs_remap_file_range,
        .llseek = cifs_llseek,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
@@ -1208,7 +1214,7 @@ const struct file_operations cifs_file_nobrl_ops = {
        .llseek = cifs_llseek,
        .unlocked_ioctl = cifs_ioctl,
        .copy_file_range = cifs_copy_file_range,
-       .clone_file_range = cifs_clone_file_range,
+       .remap_file_range = cifs_remap_file_range,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
 };
@@ -1226,15 +1232,14 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
        .llseek = cifs_llseek,
        .unlocked_ioctl = cifs_ioctl,
        .copy_file_range = cifs_copy_file_range,
-       .clone_file_range = cifs_clone_file_range,
+       .remap_file_range = cifs_remap_file_range,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
 };
 
 const struct file_operations cifs_file_direct_nobrl_ops = {
-       /* BB reevaluate whether they can be done with directio, no cache */
-       .read_iter = cifs_user_readv,
-       .write_iter = cifs_user_writev,
+       .read_iter = cifs_direct_readv,
+       .write_iter = cifs_direct_writev,
        .open = cifs_open,
        .release = cifs_close,
        .fsync = cifs_fsync,
@@ -1244,7 +1249,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
        .splice_write = iter_file_splice_write,
        .unlocked_ioctl  = cifs_ioctl,
        .copy_file_range = cifs_copy_file_range,
-       .clone_file_range = cifs_clone_file_range,
+       .remap_file_range = cifs_remap_file_range,
        .llseek = cifs_llseek,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
@@ -1256,7 +1261,7 @@ const struct file_operations cifs_dir_ops = {
        .read    = generic_read_dir,
        .unlocked_ioctl  = cifs_ioctl,
        .copy_file_range = cifs_copy_file_range,
-       .clone_file_range = cifs_clone_file_range,
+       .remap_file_range = cifs_remap_file_range,
        .llseek = generic_file_llseek,
        .fsync = cifs_dir_fsync,
 };
index 24e265a51874653401d0ecce7fa74b2b582aa62c..4c3b5cfccc49aef63af46583f771c7d9302dca19 100644 (file)
@@ -101,8 +101,10 @@ extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
 extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
+extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
+extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from);
 extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
 extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, loff_t, loff_t, int);
index ed1e0fcb69e3f606b8426091d63febea9e28dc29..38ab0fca49e1dafe66bf7dcc233e67ef42b6b301 100644 (file)
@@ -1125,6 +1125,9 @@ struct cifs_fid {
        __u8 create_guid[16];
        struct cifs_pending_open *pending_open;
        unsigned int epoch;
+#ifdef CONFIG_CIFS_DEBUG2
+       __u64 mid;
+#endif /* CIFS_DEBUG2 */
        bool purge_cache;
 };
 
@@ -1183,6 +1186,11 @@ struct cifs_aio_ctx {
        unsigned int            len;
        unsigned int            total_len;
        bool                    should_dirty;
+       /*
+        * Indicates if this aio_ctx is for direct_io,
+        * If yes, iter is a copy of the user passed iov_iter
+        */
+       bool                    direct_io;
 };
 
 struct cifs_readdata;
index 1ce733f3582f66702c18db771fac1416d702a706..79d842e7240c7cff027eebb5915cded50e8b8e3c 100644 (file)
@@ -1539,6 +1539,9 @@ struct reparse_symlink_data {
        char    PathBuffer[0];
 } __attribute__((packed));
 
+/* Flag above */
+#define SYMLINK_FLAG_RELATIVE 0x00000001
+
 /* For IO_REPARSE_TAG_NFS */
 #define NFS_SPECFILE_LNK       0x00000000014B4E4C
 #define NFS_SPECFILE_CHR       0x0000000000524843
index d82f0cc7175508e9881873a2ee9c8a53c1a72944..6f24f129a75135d2da48f414698d7a5439c11960 100644 (file)
@@ -589,7 +589,7 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
 {
        struct msghdr smb_msg;
        struct kvec iov = {.iov_base = buf, .iov_len = to_read};
-       iov_iter_kvec(&smb_msg.msg_iter, READ | ITER_KVEC, &iov, 1, to_read);
+       iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
 
        return cifs_readv_from_socket(server, &smb_msg);
 }
@@ -601,7 +601,7 @@ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
        struct msghdr smb_msg;
        struct bio_vec bv = {
                .bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
-       iov_iter_bvec(&smb_msg.msg_iter, READ | ITER_BVEC, &bv, 1, to_read);
+       iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
        return cifs_readv_from_socket(server, &smb_msg);
 }
 
index c620d4b5d5d4c84448cc44258ae1dd8ae36ce3a8..74c33d5fafc83ff841a43b15ad222a34acb860d4 100644 (file)
@@ -1005,7 +1005,7 @@ cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
  * Set the byte-range lock (mandatory style). Returns:
  * 1) 0, if we set the lock and don't need to request to the server;
  * 2) 1, if no locks prevent us but we need to request to the server;
- * 3) -EACCESS, if there is a lock that prevents us and wait is false.
+ * 3) -EACCES, if there is a lock that prevents us and wait is false.
  */
 static int
 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
@@ -2537,6 +2537,61 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
        return 0;
 }
 
+static int
+cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
+       struct cifs_aio_ctx *ctx)
+{
+       int wait_retry = 0;
+       unsigned int wsize, credits;
+       int rc;
+       struct TCP_Server_Info *server =
+               tlink_tcon(wdata->cfile->tlink)->ses->server;
+
+       /*
+        * Try to resend this wdata, waiting for credits up to 3 seconds.
+        * Note: we are attempting to resend the whole wdata not in segments
+        */
+       do {
+               rc = server->ops->wait_mtu_credits(
+                       server, wdata->bytes, &wsize, &credits);
+
+               if (rc)
+                       break;
+
+               if (wsize < wdata->bytes) {
+                       add_credits_and_wake_if(server, credits, 0);
+                       msleep(1000);
+                       wait_retry++;
+               }
+       } while (wsize < wdata->bytes && wait_retry < 3);
+
+       if (wsize < wdata->bytes) {
+               rc = -EBUSY;
+               goto out;
+       }
+
+       rc = -EAGAIN;
+       while (rc == -EAGAIN) {
+               rc = 0;
+               if (wdata->cfile->invalidHandle)
+                       rc = cifs_reopen_file(wdata->cfile, false);
+               if (!rc)
+                       rc = server->ops->async_writev(wdata,
+                                       cifs_uncached_writedata_release);
+       }
+
+       if (!rc) {
+               list_add_tail(&wdata->list, wdata_list);
+               return 0;
+       }
+
+       add_credits_and_wake_if(server, wdata->credits, 0);
+out:
+       kref_put(&wdata->refcount, cifs_uncached_writedata_release);
+
+       return rc;
+}
+
 static int
 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                     struct cifsFileInfo *open_file,
@@ -2551,6 +2606,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
        loff_t saved_offset = offset;
        pid_t pid;
        struct TCP_Server_Info *server;
+       struct page **pagevec;
+       size_t start;
 
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
                pid = open_file->pid;
@@ -2567,38 +2624,79 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                if (rc)
                        break;
 
-               nr_pages = get_numpages(wsize, len, &cur_len);
-               wdata = cifs_writedata_alloc(nr_pages,
+               if (ctx->direct_io) {
+                       ssize_t result;
+
+                       result = iov_iter_get_pages_alloc(
+                               from, &pagevec, wsize, &start);
+                       if (result < 0) {
+                               cifs_dbg(VFS,
+                                       "direct_writev couldn't get user pages "
+                                       "(rc=%zd) iter type %d iov_offset %zd "
+                                       "count %zd\n",
+                                       result, from->type,
+                                       from->iov_offset, from->count);
+                               dump_stack();
+                               break;
+                       }
+                       cur_len = (size_t)result;
+                       iov_iter_advance(from, cur_len);
+
+                       nr_pages =
+                               (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
+
+                       wdata = cifs_writedata_direct_alloc(pagevec,
                                             cifs_uncached_writev_complete);
-               if (!wdata) {
-                       rc = -ENOMEM;
-                       add_credits_and_wake_if(server, credits, 0);
-                       break;
-               }
+                       if (!wdata) {
+                               rc = -ENOMEM;
+                               add_credits_and_wake_if(server, credits, 0);
+                               break;
+                       }
 
-               rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
-               if (rc) {
-                       kfree(wdata);
-                       add_credits_and_wake_if(server, credits, 0);
-                       break;
-               }
 
-               num_pages = nr_pages;
-               rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
-               if (rc) {
-                       for (i = 0; i < nr_pages; i++)
-                               put_page(wdata->pages[i]);
-                       kfree(wdata);
-                       add_credits_and_wake_if(server, credits, 0);
-                       break;
-               }
+                       wdata->page_offset = start;
+                       wdata->tailsz =
+                               nr_pages > 1 ?
+                                       cur_len - (PAGE_SIZE - start) -
+                                       (nr_pages - 2) * PAGE_SIZE :
+                                       cur_len;
+               } else {
+                       nr_pages = get_numpages(wsize, len, &cur_len);
+                       wdata = cifs_writedata_alloc(nr_pages,
+                                            cifs_uncached_writev_complete);
+                       if (!wdata) {
+                               rc = -ENOMEM;
+                               add_credits_and_wake_if(server, credits, 0);
+                               break;
+                       }
 
-               /*
-                * Bring nr_pages down to the number of pages we actually used,
-                * and free any pages that we didn't use.
-                */
-               for ( ; nr_pages > num_pages; nr_pages--)
-                       put_page(wdata->pages[nr_pages - 1]);
+                       rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
+                       if (rc) {
+                               kfree(wdata);
+                               add_credits_and_wake_if(server, credits, 0);
+                               break;
+                       }
+
+                       num_pages = nr_pages;
+                       rc = wdata_fill_from_iovec(
+                               wdata, from, &cur_len, &num_pages);
+                       if (rc) {
+                               for (i = 0; i < nr_pages; i++)
+                                       put_page(wdata->pages[i]);
+                               kfree(wdata);
+                               add_credits_and_wake_if(server, credits, 0);
+                               break;
+                       }
+
+                       /*
+                        * Bring nr_pages down to the number of pages we
+                        * actually used, and free any pages that we didn't use.
+                        */
+                       for ( ; nr_pages > num_pages; nr_pages--)
+                               put_page(wdata->pages[nr_pages - 1]);
+
+                       wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
+               }
 
                wdata->sync_mode = WB_SYNC_ALL;
                wdata->nr_pages = nr_pages;
@@ -2607,7 +2705,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                wdata->pid = pid;
                wdata->bytes = cur_len;
                wdata->pagesz = PAGE_SIZE;
-               wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
                wdata->credits = credits;
                wdata->ctx = ctx;
                kref_get(&ctx->refcount);
@@ -2682,13 +2779,18 @@ restart_loop:
                                INIT_LIST_HEAD(&tmp_list);
                                list_del_init(&wdata->list);
 
-                               iov_iter_advance(&tmp_from,
+                               if (ctx->direct_io)
+                                       rc = cifs_resend_wdata(
+                                               wdata, &tmp_list, ctx);
+                               else {
+                                       iov_iter_advance(&tmp_from,
                                                 wdata->offset - ctx->pos);
 
-                               rc = cifs_write_from_iter(wdata->offset,
+                                       rc = cifs_write_from_iter(wdata->offset,
                                                wdata->bytes, &tmp_from,
                                                ctx->cfile, cifs_sb, &tmp_list,
                                                ctx);
+                               }
 
                                list_splice(&tmp_list, &ctx->list);
 
@@ -2701,8 +2803,9 @@ restart_loop:
                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
        }
 
-       for (i = 0; i < ctx->npages; i++)
-               put_page(ctx->bv[i].bv_page);
+       if (!ctx->direct_io)
+               for (i = 0; i < ctx->npages; i++)
+                       put_page(ctx->bv[i].bv_page);
 
        cifs_stats_bytes_written(tcon, ctx->total_len);
        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
@@ -2717,7 +2820,8 @@ restart_loop:
                complete(&ctx->done);
 }
 
-ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t __cifs_writev(
+       struct kiocb *iocb, struct iov_iter *from, bool direct)
 {
        struct file *file = iocb->ki_filp;
        ssize_t total_written = 0;
@@ -2726,13 +2830,18 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
        struct cifs_sb_info *cifs_sb;
        struct cifs_aio_ctx *ctx;
        struct iov_iter saved_from = *from;
+       size_t len = iov_iter_count(from);
        int rc;
 
        /*
-        * BB - optimize the way when signing is disabled. We can drop this
-        * extra memory-to-memory copying and use iovec buffers for constructing
-        * write request.
+        * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
+        * In this case, fall back to non-direct write function.
+        * this could be improved by getting pages directly in ITER_KVEC
         */
+       if (direct && from->type & ITER_KVEC) {
+               cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
+               direct = false;
+       }
 
        rc = generic_write_checks(iocb, from);
        if (rc <= 0)
@@ -2756,10 +2865,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 
        ctx->pos = iocb->ki_pos;
 
-       rc = setup_aio_ctx_iter(ctx, from, WRITE);
-       if (rc) {
-               kref_put(&ctx->refcount, cifs_aio_ctx_release);
-               return rc;
+       if (direct) {
+               ctx->direct_io = true;
+               ctx->iter = *from;
+               ctx->len = len;
+       } else {
+               rc = setup_aio_ctx_iter(ctx, from, WRITE);
+               if (rc) {
+                       kref_put(&ctx->refcount, cifs_aio_ctx_release);
+                       return rc;
+               }
        }
 
        /* grab a lock here due to read response handlers can access ctx */
@@ -2809,6 +2924,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
        return total_written;
 }
 
+ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+       return __cifs_writev(iocb, from, true);
+}
+
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+       return __cifs_writev(iocb, from, false);
+}
+
 static ssize_t
 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
 {
@@ -2979,7 +3104,6 @@ cifs_uncached_readdata_release(struct kref *refcount)
        kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
        for (i = 0; i < rdata->nr_pages; i++) {
                put_page(rdata->pages[i]);
-               rdata->pages[i] = NULL;
        }
        cifs_readdata_release(refcount);
 }
@@ -3004,7 +3128,7 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
                size_t written;
 
-               if (unlikely(iter->type & ITER_PIPE)) {
+               if (unlikely(iov_iter_is_pipe(iter))) {
                        void *addr = kmap_atomic(page);
 
                        written = copy_to_iter(addr, copy, iter);
@@ -3106,6 +3230,67 @@ cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
        return uncached_fill_pages(server, rdata, iter, iter->count);
 }
 
+static int cifs_resend_rdata(struct cifs_readdata *rdata,
+                       struct list_head *rdata_list,
+                       struct cifs_aio_ctx *ctx)
+{
+       int wait_retry = 0;
+       unsigned int rsize, credits;
+       int rc;
+       struct TCP_Server_Info *server =
+               tlink_tcon(rdata->cfile->tlink)->ses->server;
+
+       /*
+        * Try to resend this rdata, waiting for credits up to 3 seconds.
+        * Note: we are attempting to resend the whole rdata not in segments
+        */
+       do {
+               rc = server->ops->wait_mtu_credits(server, rdata->bytes,
+                                               &rsize, &credits);
+
+               if (rc)
+                       break;
+
+               if (rsize < rdata->bytes) {
+                       add_credits_and_wake_if(server, credits, 0);
+                       msleep(1000);
+                       wait_retry++;
+               }
+       } while (rsize < rdata->bytes && wait_retry < 3);
+
+       /*
+        * If we can't find enough credits to send this rdata
+        * release the rdata and return failure, this will pass
+        * whatever I/O amount we have finished to VFS.
+        */
+       if (rsize < rdata->bytes) {
+               rc = -EBUSY;
+               goto out;
+       }
+
+       rc = -EAGAIN;
+       while (rc == -EAGAIN) {
+               rc = 0;
+               if (rdata->cfile->invalidHandle)
+                       rc = cifs_reopen_file(rdata->cfile, true);
+               if (!rc)
+                       rc = server->ops->async_readv(rdata);
+       }
+
+       if (!rc) {
+               /* Add to aio pending list */
+               list_add_tail(&rdata->list, rdata_list);
+               return 0;
+       }
+
+       add_credits_and_wake_if(server, rdata->credits, 0);
+out:
+       kref_put(&rdata->refcount,
+               cifs_uncached_readdata_release);
+
+       return rc;
+}
+
 static int
 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
@@ -3117,6 +3302,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
        int rc;
        pid_t pid;
        struct TCP_Server_Info *server;
+       struct page **pagevec;
+       size_t start;
+       struct iov_iter direct_iov = ctx->iter;
 
        server = tlink_tcon(open_file->tlink)->ses->server;
 
@@ -3125,6 +3313,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
        else
                pid = current->tgid;
 
+       if (ctx->direct_io)
+               iov_iter_advance(&direct_iov, offset - ctx->pos);
+
        do {
                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
                                                   &rsize, &credits);
@@ -3132,20 +3323,59 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
                        break;
 
                cur_len = min_t(const size_t, len, rsize);
-               npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
 
-               /* allocate a readdata struct */
-               rdata = cifs_readdata_alloc(npages,
+               if (ctx->direct_io) {
+                       ssize_t result;
+
+                       result = iov_iter_get_pages_alloc(
+                                       &direct_iov, &pagevec,
+                                       cur_len, &start);
+                       if (result < 0) {
+                               cifs_dbg(VFS,
+                                       "couldn't get user pages (cur_len=%zd)"
+                                       " iter type %d"
+                                       " iov_offset %zd count %zd\n",
+                                       result, direct_iov.type,
+                                       direct_iov.iov_offset,
+                                       direct_iov.count);
+                               dump_stack();
+                               break;
+                       }
+                       cur_len = (size_t)result;
+                       iov_iter_advance(&direct_iov, cur_len);
+
+                       rdata = cifs_readdata_direct_alloc(
+                                       pagevec, cifs_uncached_readv_complete);
+                       if (!rdata) {
+                               add_credits_and_wake_if(server, credits, 0);
+                               rc = -ENOMEM;
+                               break;
+                       }
+
+                       npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
+                       rdata->page_offset = start;
+                       rdata->tailsz = npages > 1 ?
+                               cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
+                               cur_len;
+
+               } else {
+
+                       npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
+                       /* allocate a readdata struct */
+                       rdata = cifs_readdata_alloc(npages,
                                            cifs_uncached_readv_complete);
-               if (!rdata) {
-                       add_credits_and_wake_if(server, credits, 0);
-                       rc = -ENOMEM;
-                       break;
-               }
+                       if (!rdata) {
+                               add_credits_and_wake_if(server, credits, 0);
+                               rc = -ENOMEM;
+                               break;
+                       }
 
-               rc = cifs_read_allocate_pages(rdata, npages);
-               if (rc)
-                       goto error;
+                       rc = cifs_read_allocate_pages(rdata, npages);
+                       if (rc)
+                               goto error;
+
+                       rdata->tailsz = PAGE_SIZE;
+               }
 
                rdata->cfile = cifsFileInfo_get(open_file);
                rdata->nr_pages = npages;
@@ -3153,7 +3383,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
                rdata->bytes = cur_len;
                rdata->pid = pid;
                rdata->pagesz = PAGE_SIZE;
-               rdata->tailsz = PAGE_SIZE;
                rdata->read_into_pages = cifs_uncached_read_into_pages;
                rdata->copy_into_pages = cifs_uncached_copy_into_pages;
                rdata->credits = credits;
@@ -3167,9 +3396,11 @@ error:
                if (rc) {
                        add_credits_and_wake_if(server, rdata->credits, 0);
                        kref_put(&rdata->refcount,
-                                cifs_uncached_readdata_release);
-                       if (rc == -EAGAIN)
+                               cifs_uncached_readdata_release);
+                       if (rc == -EAGAIN) {
+                               iov_iter_revert(&direct_iov, cur_len);
                                continue;
+                       }
                        break;
                }
 
@@ -3225,45 +3456,62 @@ again:
                                 * reading.
                                 */
                                if (got_bytes && got_bytes < rdata->bytes) {
-                                       rc = cifs_readdata_to_iov(rdata, to);
+                                       rc = 0;
+                                       if (!ctx->direct_io)
+                                               rc = cifs_readdata_to_iov(rdata, to);
                                        if (rc) {
                                                kref_put(&rdata->refcount,
-                                               cifs_uncached_readdata_release);
+                                                       cifs_uncached_readdata_release);
                                                continue;
                                        }
                                }
 
-                               rc = cifs_send_async_read(
+                               if (ctx->direct_io) {
+                                       /*
+                                        * Re-use rdata as this is a
+                                        * direct I/O
+                                        */
+                                       rc = cifs_resend_rdata(
+                                               rdata,
+                                               &tmp_list, ctx);
+                               } else {
+                                       rc = cifs_send_async_read(
                                                rdata->offset + got_bytes,
                                                rdata->bytes - got_bytes,
                                                rdata->cfile, cifs_sb,
                                                &tmp_list, ctx);
 
+                                       kref_put(&rdata->refcount,
+                                               cifs_uncached_readdata_release);
+                               }
+
                                list_splice(&tmp_list, &ctx->list);
 
-                               kref_put(&rdata->refcount,
-                                        cifs_uncached_readdata_release);
                                goto again;
                        } else if (rdata->result)
                                rc = rdata->result;
-                       else
+                       else if (!ctx->direct_io)
                                rc = cifs_readdata_to_iov(rdata, to);
 
                        /* if there was a short read -- discard anything left */
                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
                                rc = -ENODATA;
+
+                       ctx->total_len += rdata->got_bytes;
                }
                list_del_init(&rdata->list);
                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
        }
 
-       for (i = 0; i < ctx->npages; i++) {
-               if (ctx->should_dirty)
-                       set_page_dirty(ctx->bv[i].bv_page);
-               put_page(ctx->bv[i].bv_page);
-       }
+       if (!ctx->direct_io) {
+               for (i = 0; i < ctx->npages; i++) {
+                       if (ctx->should_dirty)
+                               set_page_dirty(ctx->bv[i].bv_page);
+                       put_page(ctx->bv[i].bv_page);
+               }
 
-       ctx->total_len = ctx->len - iov_iter_count(to);
+               ctx->total_len = ctx->len - iov_iter_count(to);
+       }
 
        cifs_stats_bytes_read(tcon, ctx->total_len);
 
@@ -3281,18 +3529,28 @@ again:
                complete(&ctx->done);
 }
 
-ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+static ssize_t __cifs_readv(
+       struct kiocb *iocb, struct iov_iter *to, bool direct)
 {
-       struct file *file = iocb->ki_filp;
-       ssize_t rc;
        size_t len;
-       ssize_t total_read = 0;
-       loff_t offset = iocb->ki_pos;
+       struct file *file = iocb->ki_filp;
        struct cifs_sb_info *cifs_sb;
-       struct cifs_tcon *tcon;
        struct cifsFileInfo *cfile;
+       struct cifs_tcon *tcon;
+       ssize_t rc, total_read = 0;
+       loff_t offset = iocb->ki_pos;
        struct cifs_aio_ctx *ctx;
 
+       /*
+        * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
+        * fall back to data copy read path
+        * this could be improved by getting pages directly in ITER_KVEC
+        */
+       if (direct && to->type & ITER_KVEC) {
+               cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
+               direct = false;
+       }
+
        len = iov_iter_count(to);
        if (!len)
                return 0;
@@ -3316,17 +3574,23 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
        if (!is_sync_kiocb(iocb))
                ctx->iocb = iocb;
 
-       if (to->type == ITER_IOVEC)
+       if (iter_is_iovec(to))
                ctx->should_dirty = true;
 
-       rc = setup_aio_ctx_iter(ctx, to, READ);
-       if (rc) {
-               kref_put(&ctx->refcount, cifs_aio_ctx_release);
-               return rc;
+       if (direct) {
+               ctx->pos = offset;
+               ctx->direct_io = true;
+               ctx->iter = *to;
+               ctx->len = len;
+       } else {
+               rc = setup_aio_ctx_iter(ctx, to, READ);
+               if (rc) {
+                       kref_put(&ctx->refcount, cifs_aio_ctx_release);
+                       return rc;
+               }
+               len = ctx->len;
        }
 
-       len = ctx->len;
-
        /* grab a lock here due to read response handlers can access ctx */
        mutex_lock(&ctx->aio_mutex);
 
@@ -3368,6 +3632,16 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
        return rc;
 }
 
+ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
+{
+       return __cifs_readv(iocb, to, true);
+}
+
+ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+{
+       return __cifs_readv(iocb, to, false);
+}
+
 ssize_t
 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
 {
index 1023d78673fb14b38ba32c793edee92b119053e9..a81a9df997c1c113e0c00e8d0695d46fc05060f6 100644 (file)
@@ -1320,8 +1320,8 @@ cifs_drop_nlink(struct inode *inode)
 /*
  * If d_inode(dentry) is null (usually meaning the cached dentry
  * is a negative dentry) then we would attempt a standard SMB delete, but
- * if that fails we can not attempt the fall back mechanisms on EACCESS
- * but will return the EACCESS to the caller. Note that the VFS does not call
+ * if that fails we can not attempt the fall back mechanisms on EACCES
+ * but will return the EACCES to the caller. Note that the VFS does not call
  * unlink on negative dentries currently.
  */
 int cifs_unlink(struct inode *dir, struct dentry *dentry)
index fc43d5d25d1df1cb6bda302520172f5788d13c55..8a41f4eba7264fd37584b92bbcaa3035b487c649 100644 (file)
@@ -788,7 +788,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
        struct page **pages = NULL;
        struct bio_vec *bv = NULL;
 
-       if (iter->type & ITER_KVEC) {
+       if (iov_iter_is_kvec(iter)) {
                memcpy(&ctx->iter, iter, sizeof(struct iov_iter));
                ctx->len = count;
                iov_iter_advance(iter, count);
@@ -859,7 +859,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
        ctx->bv = bv;
        ctx->len = saved_len - count;
        ctx->npages = npages;
-       iov_iter_bvec(&ctx->iter, ITER_BVEC | rw, ctx->bv, npages, ctx->len);
+       iov_iter_bvec(&ctx->iter, rw, ctx->bv, npages, ctx->len);
        return 0;
 }
 
index f85fc5aa2710821189e7c95ca201470e9f02b4f4..225fec1cfa673360d794058e5acb1d5737acdd32 100644 (file)
@@ -747,6 +747,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
        int rc = 0;
        unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0;
        char *name, *value;
+       size_t buf_size = dst_size;
        size_t name_len, value_len, user_name_len;
 
        while (src_size > 0) {
@@ -782,9 +783,10 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
                        /* 'user.' plus a terminating null */
                        user_name_len = 5 + 1 + name_len;
 
-                       rc += user_name_len;
-
-                       if (dst_size >= user_name_len) {
+                       if (buf_size == 0) {
+                               /* skip copy - calc size only */
+                               rc += user_name_len;
+                       } else if (dst_size >= user_name_len) {
                                dst_size -= user_name_len;
                                memcpy(dst, "user.", 5);
                                dst += 5;
@@ -792,8 +794,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
                                dst += name_len;
                                *dst = 0;
                                ++dst;
-                       } else if (dst_size == 0) {
-                               /* skip copy - calc size only */
+                               rc += user_name_len;
                        } else {
                                /* stop before overrun buffer */
                                rc = -ERANGE;
@@ -1078,6 +1079,9 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 
        cfile->fid.persistent_fid = fid->persistent_fid;
        cfile->fid.volatile_fid = fid->volatile_fid;
+#ifdef CONFIG_CIFS_DEBUG2
+       cfile->fid.mid = fid->mid;
+#endif /* CIFS_DEBUG2 */
        server->ops->set_oplock_level(cinode, oplock, fid->epoch,
                                      &fid->purge_cache);
        cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
@@ -3152,13 +3156,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
                        return 0;
                }
 
-               iov_iter_bvec(&iter, WRITE | ITER_BVEC, bvec, npages, data_len);
+               iov_iter_bvec(&iter, WRITE, bvec, npages, data_len);
        } else if (buf_len >= data_offset + data_len) {
                /* read response payload is in buf */
                WARN_ONCE(npages > 0, "read data can be either in buf or in pages");
                iov.iov_base = buf + data_offset;
                iov.iov_len = data_len;
-               iov_iter_kvec(&iter, WRITE | ITER_KVEC, &iov, 1, data_len);
+               iov_iter_kvec(&iter, WRITE, &iov, 1, data_len);
        } else {
                /* read response payload cannot be in both buf and pages */
                WARN_ONCE(1, "buf can not contain only a part of read data");
index 7d7b016fe8bb0e9edd5523a739e94e37db7291d6..27f86537a5d11acebc45263236d9cdf131215337 100644 (file)
@@ -1512,7 +1512,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
        rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
        cifs_small_buf_release(req);
        rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
-
+       trace_smb3_tcon(xid, tcon->tid, ses->Suid, tree, rc);
        if (rc != 0) {
                if (tcon) {
                        cifs_stats_fail_inc(tcon, SMB2_TREE_CONNECT_HE);
@@ -1559,6 +1559,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
        if (tcon->ses->server->ops->validate_negotiate)
                rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
 tcon_exit:
+
        free_rsp_buf(resp_buftype, rsp);
        kfree(unc_path);
        return rc;
@@ -2308,6 +2309,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
        atomic_inc(&tcon->num_remote_opens);
        oparms->fid->persistent_fid = rsp->PersistentFileId;
        oparms->fid->volatile_fid = rsp->VolatileFileId;
+#ifdef CONFIG_CIFS_DEBUG2
+       oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId);
+#endif /* CIFS_DEBUG2 */
 
        if (buf) {
                memcpy(buf, &rsp->CreationTime, 32);
index f753f424d7f111454e52ab651b321b8bf2011e38..5671d5ee7f58f68d62243eea12f6fe934311b6d2 100644 (file)
@@ -842,6 +842,41 @@ struct fsctl_get_integrity_information_rsp {
 /* Integrity flags for above */
 #define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF  0x00000001
 
+/* Reparse structures - see MS-FSCC 2.1.2 */
+
+/* struct fsctl_reparse_info_req is empty, only response structs (see below) */
+
+struct reparse_data_buffer {
+       __le32  ReparseTag;
+       __le16  ReparseDataLength;
+       __u16   Reserved;
+       __u8    DataBuffer[0]; /* Variable Length */
+} __packed;
+
+struct reparse_guid_data_buffer {
+       __le32  ReparseTag;
+       __le16  ReparseDataLength;
+       __u16   Reserved;
+       __u8    ReparseGuid[16];
+       __u8    DataBuffer[0]; /* Variable Length */
+} __packed;
+
+struct reparse_mount_point_data_buffer {
+       __le32  ReparseTag;
+       __le16  ReparseDataLength;
+       __u16   Reserved;
+       __le16  SubstituteNameOffset;
+       __le16  SubstituteNameLength;
+       __le16  PrintNameOffset;
+       __le16  PrintNameLength;
+       __u8    PathBuffer[0]; /* Variable Length */
+} __packed;
+
+/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */
+
+/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */
+
+
 /* See MS-DFSC 2.2.2 */
 struct fsctl_get_dfs_referral_req {
        __le16 MaxReferralLevel;
index 5e282368cc4a9f3c43c8f4d402ce918f23753846..e94a8d1d08a3cedba8886f690a767739f97111d1 100644 (file)
@@ -2054,14 +2054,22 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
 
        info->smbd_recv_pending++;
 
-       switch (msg->msg_iter.type) {
-       case READ | ITER_KVEC:
+       if (iov_iter_rw(&msg->msg_iter) == WRITE) {
+               /* It's a bug in upper layer to get there */
+               cifs_dbg(VFS, "CIFS: invalid msg iter dir %u\n",
+                        iov_iter_rw(&msg->msg_iter));
+               rc = -EINVAL;
+               goto out;
+       }
+
+       switch (iov_iter_type(&msg->msg_iter)) {
+       case ITER_KVEC:
                buf = msg->msg_iter.kvec->iov_base;
                to_read = msg->msg_iter.kvec->iov_len;
                rc = smbd_recv_buf(info, buf, to_read);
                break;
 
-       case READ | ITER_BVEC:
+       case ITER_BVEC:
                page = msg->msg_iter.bvec->bv_page;
                page_offset = msg->msg_iter.bvec->bv_offset;
                to_read = msg->msg_iter.bvec->bv_len;
@@ -2071,10 +2079,11 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
        default:
                /* It's a bug in upper layer to get there */
                cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
-                       msg->msg_iter.type);
+                        iov_iter_type(&msg->msg_iter));
                rc = -EINVAL;
        }
 
+out:
        info->smbd_recv_pending--;
        wake_up(&info->wait_smbd_recv_pending);
 
index cce8414fe7ec2b5e3af9c0660c6db033828cb86f..fb049809555fea9b3e2cc072b735ac0adfdfe1ae 100644 (file)
@@ -373,6 +373,48 @@ DEFINE_EVENT(smb3_enter_exit_class, smb3_##name,  \
 DEFINE_SMB3_ENTER_EXIT_EVENT(enter);
 DEFINE_SMB3_ENTER_EXIT_EVENT(exit_done);
 
+/*
+ * For SMB2/SMB3 tree connect
+ */
+
+DECLARE_EVENT_CLASS(smb3_tcon_class,
+       TP_PROTO(unsigned int xid,
+               __u32   tid,
+               __u64   sesid,
+               const char *unc_name,
+               int     rc),
+       TP_ARGS(xid, tid, sesid, unc_name, rc),
+       TP_STRUCT__entry(
+               __field(unsigned int, xid)
+               __field(__u32, tid)
+               __field(__u64, sesid)
+               __field(const char *,  unc_name)
+               __field(int, rc)
+       ),
+       TP_fast_assign(
+               __entry->xid = xid;
+               __entry->tid = tid;
+               __entry->sesid = sesid;
+               __entry->unc_name = unc_name;
+               __entry->rc = rc;
+       ),
+       TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d",
+               __entry->xid, __entry->sesid, __entry->tid,
+               __entry->unc_name, __entry->rc)
+)
+
+#define DEFINE_SMB3_TCON_EVENT(name)          \
+DEFINE_EVENT(smb3_tcon_class, smb3_##name,    \
+       TP_PROTO(unsigned int xid,              \
+               __u32   tid,                    \
+               __u64   sesid,                  \
+               const char *unc_name,           \
+               int     rc),                    \
+       TP_ARGS(xid, tid, sesid, unc_name, rc))
+
+DEFINE_SMB3_TCON_EVENT(tcon);
+
+
 /*
  * For smb2/smb3 open call
  */
index f8112433f0c8b3fa90911bc8451b631a5b9b1ef8..83ff0c25710d0861efad84a38d1da01948bbef54 100644 (file)
@@ -316,8 +316,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
                        .iov_base = &rfc1002_marker,
                        .iov_len  = 4
                };
-               iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, &hiov,
-                             1, 4);
+               iov_iter_kvec(&smb_msg.msg_iter, WRITE, &hiov, 1, 4);
                rc = smb_send_kvec(server, &smb_msg, &sent);
                if (rc < 0)
                        goto uncork;
@@ -338,8 +337,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
                        size += iov[i].iov_len;
                }
 
-               iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC,
-                             iov, n_vec, size);
+               iov_iter_kvec(&smb_msg.msg_iter, WRITE, iov, n_vec, size);
 
                rc = smb_send_kvec(server, &smb_msg, &sent);
                if (rc < 0)
@@ -355,7 +353,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
                        rqst_page_get_length(&rqst[j], i, &bvec.bv_len,
                                             &bvec.bv_offset);
 
-                       iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC,
+                       iov_iter_bvec(&smb_msg.msg_iter, WRITE,
                                      &bvec, 1, bvec.bv_len);
                        rc = smb_send_kvec(server, &smb_msg, &sent);
                        if (rc < 0)
index 093fb54cd3163d96b03ebf3b0bea4a83a26bb02a..722d17c88edb9016ac910072f4f758605089ec83 100644 (file)
@@ -1313,7 +1313,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        spin_lock_init(&dio->bio_lock);
        dio->refcount = 1;
 
-       dio->should_dirty = (iter->type == ITER_IOVEC);
+       dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
        sdio.iter = iter;
        sdio.final_block_in_request = end >> blkbits;
 
index a5e4a221435c04bdf97e1219ca3bd022ca864e6c..76976d6e50f93ac4fad8be97ab0566bcfce6c775 100644 (file)
@@ -674,7 +674,7 @@ static int receive_from_sock(struct connection *con)
                nvec = 2;
        }
        len = iov[0].iov_len + iov[1].iov_len;
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nvec, len);
+       iov_iter_kvec(&msg.msg_iter, READ, iov, nvec, len);
 
        r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL);
        if (ret <= 0)
index 41cf2fbee50da4cb9ec0999a967d7d777fb7fc45..906839a4da8ff8215f7a18f26030cfbc18b9b0e3 100644 (file)
@@ -101,6 +101,7 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
                token = match_token(p, tokens, args);
                switch (token) {
                case Opt_name:
+                       kfree(opts->dev_name);
                        opts->dev_name = match_strdup(&args[0]);
                        if (unlikely(!opts->dev_name)) {
                                EXOFS_ERR("Error allocating dev_name");
@@ -117,7 +118,7 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
                                          EXOFS_MIN_PID);
                                return -EINVAL;
                        }
-                       s_pid = 1;
+                       s_pid = true;
                        break;
                case Opt_to:
                        if (match_int(&args[0], &option))
@@ -866,8 +867,10 @@ static struct dentry *exofs_mount(struct file_system_type *type,
        int ret;
 
        ret = parse_options(data, &opts);
-       if (ret)
+       if (ret) {
+               kfree(opts.dev_name);
                return ERR_PTR(ret);
+       }
 
        if (!opts.dev_name)
                opts.dev_name = dev_name;
index 12f90d48ba6137c05673dd918feb6aca9281f4d2..3f89d0ab08fc4c8355d207c4f27f7019f08122bd 100644 (file)
 
 #include <linux/compiler.h>
 
-/* Until this gets included into linux/compiler-gcc.h */
-#ifndef __nonstring
-#if defined(GCC_VERSION) && (GCC_VERSION >= 80000)
-#define __nonstring __attribute__((nonstring))
-#else
-#define __nonstring
-#endif
-#endif
-
 /*
  * The fourth extended filesystem constants/structures
  */
index 2addcb8730e19afedcabac390e775124e6a66559..014f6a698cb712a25929f135238cb88614360d31 100644 (file)
@@ -1216,7 +1216,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
        bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
        bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
        if (IS_ERR(bitmap_bh))
-               return (struct inode *) bitmap_bh;
+               return ERR_CAST(bitmap_bh);
 
        /* Having the inode bit set should be a 100% indicator that this
         * is a valid orphan (no e2fsck run on fs).  Orphans also include
index 67a38532032ae89cfcbaec35fe139992cbf2875e..17adcb16a9c85f8fee50f2796ed10c79f7e32cd7 100644 (file)
@@ -1556,7 +1556,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 
        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
        if (IS_ERR(bh))
-               return (struct dentry *) bh;
+               return ERR_CAST(bh);
        inode = NULL;
        if (bh) {
                __u32 ino = le32_to_cpu(de->inode);
@@ -1600,7 +1600,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
 
        bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
        if (IS_ERR(bh))
-               return (struct dentry *) bh;
+               return ERR_CAST(bh);
        if (!bh)
                return ERR_PTR(-ENOENT);
        ino = le32_to_cpu(de->inode);
index 2aa62d58d8dd87e095bcb61f84aa78ef755463a1..db7590178dfcf1a4b59ee3c44deaa89a21de8ca6 100644 (file)
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
        bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
        if (!bio)
                return -ENOMEM;
+       wbc_init_bio(io->io_wbc, bio);
        bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
        bio_set_dev(bio, bh->b_bdev);
        bio->bi_end_io = ext4_end_bio;
        bio->bi_private = ext4_get_io_end(io->io_end);
        io->io_bio = bio;
        io->io_next_block = bh->b_blocknr;
-       wbc_init_bio(io->io_wbc, bio);
        return 0;
 }
 
index 58dbc39fea639ef5e0dc95dd830e47c9763ffe29..cc2121b37bf5f7d3bb0a57398a2abfc6948ad321 100644 (file)
@@ -1275,7 +1275,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
        ssize_t ret = 0;
 
        /* Special case for kernel I/O: can copy directly into the buffer */
-       if (ii->type & ITER_KVEC) {
+       if (iov_iter_is_kvec(ii)) {
                unsigned long user_addr = fuse_get_user_addr(ii);
                size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
index 2005529af560891043170b4d86ed05c2a62f19eb..d64f622cac8b8f7a9cb0fdd842ce0f788f857c8d 100644 (file)
@@ -223,6 +223,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
                             u64 off, u64 olen, u64 destoff)
 {
        struct fd src_file = fdget(srcfd);
+       loff_t cloned;
        int ret;
 
        if (!src_file.file)
@@ -230,7 +231,14 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
        ret = -EXDEV;
        if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
                goto fdput;
-       ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+       cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
+                                     olen, 0);
+       if (cloned < 0)
+               ret = cloned;
+       else if (olen && cloned != olen)
+               ret = -EINVAL;
+       else
+               ret = 0;
 fdput:
        fdput(src_file);
        return ret;
@@ -669,6 +677,9 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
                return ioctl_fiemap(filp, arg);
 
        case FIGETBSZ:
+               /* anon_bdev filesystems may not have a block size */
+               if (!inode->i_sb->s_blocksize)
+                       return -EINVAL;
                return put_user(inode->i_sb->s_blocksize, argp);
 
        case FICLONE:
index 90c2febc93acc716d1db6f69ca1e287f7cbac6f5..64ce240217a18985dd0510fd968b257dce214cd4 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/dax.h>
 #include <linux/sched/signal.h>
-#include <linux/swap.h>
 
 #include "internal.h"
 
@@ -1795,7 +1794,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                if (pos >= dio->i_size)
                        goto out_free_dio;
 
-               if (iter->type == ITER_IOVEC)
+               if (iter_is_iovec(iter) && iov_iter_rw(iter) == READ)
                        dio->flags |= IOMAP_DIO_DIRTY;
        } else {
                flags |= IOMAP_WRITE;
index 98d27da43304706f4c8dcc572a397d89ff34cef2..74f64294a410838e45fa5f4cf83757fa8b46e844 100644 (file)
@@ -1540,8 +1540,13 @@ static int do_umount(struct mount *mnt, int flags)
 
        namespace_lock();
        lock_mount_hash();
-       event++;
 
+       /* Recheck MNT_LOCKED with the locks held */
+       retval = -EINVAL;
+       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+               goto out;
+
+       event++;
        if (flags & MNT_DETACH) {
                if (!list_empty(&mnt->mnt_list))
                        umount_tree(mnt, UMOUNT_PROPAGATE);
@@ -1555,6 +1560,7 @@ static int do_umount(struct mount *mnt, int flags)
                        retval = 0;
                }
        }
+out:
        unlock_mount_hash();
        namespace_unlock();
        return retval;
@@ -1645,7 +1651,7 @@ int ksys_umount(char __user *name, int flags)
                goto dput_and_out;
        if (!check_mnt(mnt))
                goto dput_and_out;
-       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+       if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
                goto dput_and_out;
        retval = -EPERM;
        if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
@@ -1728,8 +1734,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                for (s = r; s; s = next_mnt(s, r)) {
                        if (!(flag & CL_COPY_UNBINDABLE) &&
                            IS_MNT_UNBINDABLE(s)) {
-                               s = skip_mnt_tree(s);
-                               continue;
+                               if (s->mnt.mnt_flags & MNT_LOCKED) {
+                                       /* Both unbindable and locked. */
+                                       q = ERR_PTR(-EPERM);
+                                       goto out;
+                               } else {
+                                       s = skip_mnt_tree(s);
+                                       continue;
+                               }
                        }
                        if (!(flag & CL_COPY_MNT_NS_FILE) &&
                            is_mnt_ns_file(s->mnt.mnt_root)) {
@@ -1782,7 +1794,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
 {
        namespace_lock();
        lock_mount_hash();
-       umount_tree(real_mount(mnt), UMOUNT_SYNC);
+       umount_tree(real_mount(mnt), 0);
        unlock_mount_hash();
        namespace_unlock();
 }
index 4288a6ecaf756361bf134af1b790cb15fd9c02d6..46d691ba04bc8fdb38b28beca6eb445c50b7443a 100644 (file)
@@ -180,8 +180,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
        return nfs42_proc_allocate(filep, offset, len);
 }
 
-static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
-               struct file *dst_file, loff_t dst_off, u64 count)
+static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
+               struct file *dst_file, loff_t dst_off, loff_t count,
+               unsigned int remap_flags)
 {
        struct inode *dst_inode = file_inode(dst_file);
        struct nfs_server *server = NFS_SERVER(dst_inode);
@@ -190,6 +191,9 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
        bool same_inode = false;
        int ret;
 
+       if (remap_flags & ~REMAP_FILE_ADVISORY)
+               return -EINVAL;
+
        /* check alignment w.r.t. clone_blksize */
        ret = -EINVAL;
        if (bs) {
@@ -240,7 +244,7 @@ out_unlock:
                inode_unlock(src_inode);
        }
 out:
-       return ret;
+       return ret < 0 ? ret : count;
 }
 #endif /* CONFIG_NFS_V4_2 */
 
@@ -262,7 +266,7 @@ const struct file_operations nfs4_file_operations = {
        .copy_file_range = nfs4_copy_file_range,
        .llseek         = nfs4_file_llseek,
        .fallocate      = nfs42_fallocate,
-       .clone_file_range = nfs42_clone_file_range,
+       .remap_file_range = nfs42_remap_file_range,
 #else
        .llseek         = nfs_file_llseek,
 #endif
index db84b4adbc491d7cd62e782ac7440a71c3a6c764..867457d6dfbe54060ae6f152ca2a07dfa605ff71 100644 (file)
@@ -3788,7 +3788,7 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
        }
 
        /*
-        * -EACCESS could mean that the user doesn't have correct permissions
+        * -EACCES could mean that the user doesn't have correct permissions
         * to access the mount.  It could also mean that we tried to mount
         * with a gss auth flavor, but rpc.gssd isn't running.  Either way,
         * existing mount programs don't handle -EACCES very well so it should
index 2751976704e9388239fbb3742001e261ca09bdfe..eb67098117b4c09eeaa1bf81f08139deba4dd23a 100644 (file)
@@ -541,8 +541,12 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
                u64 dst_pos, u64 count)
 {
-       return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
-                                            count));
+       loff_t cloned;
+
+       cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
+       if (count && cloned != count)
+               cloned = -EINVAL;
+       return nfserrno(cloned < 0 ? cloned : 0);
 }
 
 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
@@ -923,7 +927,7 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
        int host_err;
 
        trace_nfsd_read_vector(rqstp, fhp, offset, *count);
-       iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
+       iov_iter_kvec(&iter, READ, vec, vlen, *count);
        host_err = vfs_iter_read(file, &iter, &offset, 0);
        return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
 }
@@ -999,7 +1003,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        if (stable && !use_wgather)
                flags |= RWF_SYNC;
 
-       iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
+       iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
        host_err = vfs_iter_write(file, &iter, &pos, flags);
        if (host_err < 0)
                goto out_nfserr;
index 4690cd75d8d7948a056fe899bc4600ade10b8566..3986c7a1f6a88c2b0ed421a1f6fbc45c7a68405c 100644 (file)
@@ -312,7 +312,7 @@ static struct dentry *ntfs_get_parent(struct dentry *child_dent)
        /* Get the mft record of the inode belonging to the child dentry. */
        mrec = map_mft_record(ni);
        if (IS_ERR(mrec))
-               return (struct dentry *)mrec;
+               return ERR_CAST(mrec);
        /* Find the first file name attribute in the mft record. */
        ctx = ntfs_attr_get_search_ctx(ni, mrec);
        if (unlikely(!ctx)) {
index 1d098c3c00e023540d6f0665720390647945af58..4ebbd57cbf8460da741860a4e657ab5fa60f6d7b 100644 (file)
@@ -99,25 +99,34 @@ out:
        return ret;
 }
 
+/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
+ * will be easier to handle read failure.
+ */
 int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
                           unsigned int nr, struct buffer_head *bhs[])
 {
        int status = 0;
        unsigned int i;
        struct buffer_head *bh;
+       int new_bh = 0;
 
        trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
 
        if (!nr)
                goto bail;
 
+       /* Don't put buffer head and re-assign it to NULL if it is allocated
+        * outside since the caller can't be aware of this alternation!
+        */
+       new_bh = (bhs[0] == NULL);
+
        for (i = 0 ; i < nr ; i++) {
                if (bhs[i] == NULL) {
                        bhs[i] = sb_getblk(osb->sb, block++);
                        if (bhs[i] == NULL) {
                                status = -ENOMEM;
                                mlog_errno(status);
-                               goto bail;
+                               break;
                        }
                }
                bh = bhs[i];
@@ -158,9 +167,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
                submit_bh(REQ_OP_READ, 0, bh);
        }
 
+read_failure:
        for (i = nr; i > 0; i--) {
                bh = bhs[i - 1];
 
+               if (unlikely(status)) {
+                       if (new_bh && bh) {
+                               /* If middle bh fails, let previous bh
+                                * finish its read and then put it to
+                                * aovoid bh leak
+                                */
+                               if (!buffer_jbd(bh))
+                                       wait_on_buffer(bh);
+                               put_bh(bh);
+                               bhs[i - 1] = NULL;
+                       } else if (bh && buffer_uptodate(bh)) {
+                               clear_buffer_uptodate(bh);
+                       }
+                       continue;
+               }
+
                /* No need to wait on the buffer if it's managed by JBD. */
                if (!buffer_jbd(bh))
                        wait_on_buffer(bh);
@@ -170,8 +196,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
                         * so we can safely record this and loop back
                         * to cleanup the other buffers. */
                        status = -EIO;
-                       put_bh(bh);
-                       bhs[i - 1] = NULL;
+                       goto read_failure;
                }
        }
 
@@ -179,6 +204,9 @@ bail:
        return status;
 }
 
+/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
+ * will be easier to handle read failure.
+ */
 int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                      struct buffer_head *bhs[], int flags,
                      int (*validate)(struct super_block *sb,
@@ -188,6 +216,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
        int i, ignore_cache = 0;
        struct buffer_head *bh;
        struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+       int new_bh = 0;
 
        trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
 
@@ -213,6 +242,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                goto bail;
        }
 
+       /* Don't put buffer head and re-assign it to NULL if it is allocated
+        * outside since the caller can't be aware of this alternation!
+        */
+       new_bh = (bhs[0] == NULL);
+
        ocfs2_metadata_cache_io_lock(ci);
        for (i = 0 ; i < nr ; i++) {
                if (bhs[i] == NULL) {
@@ -221,7 +255,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                                ocfs2_metadata_cache_io_unlock(ci);
                                status = -ENOMEM;
                                mlog_errno(status);
-                               goto bail;
+                               /* Don't forget to put previous bh! */
+                               break;
                        }
                }
                bh = bhs[i];
@@ -316,16 +351,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                }
        }
 
-       status = 0;
-
+read_failure:
        for (i = (nr - 1); i >= 0; i--) {
                bh = bhs[i];
 
                if (!(flags & OCFS2_BH_READAHEAD)) {
-                       if (status) {
-                               /* Clear the rest of the buffers on error */
-                               put_bh(bh);
-                               bhs[i] = NULL;
+                       if (unlikely(status)) {
+                               /* Clear the buffers on error including those
+                                * ever succeeded in reading
+                                */
+                               if (new_bh && bh) {
+                                       /* If middle bh fails, let previous bh
+                                        * finish its read and then put it to
+                                        * aovoid bh leak
+                                        */
+                                       if (!buffer_jbd(bh))
+                                               wait_on_buffer(bh);
+                                       put_bh(bh);
+                                       bhs[i] = NULL;
+                               } else if (bh && buffer_uptodate(bh)) {
+                                       clear_buffer_uptodate(bh);
+                               }
                                continue;
                        }
                        /* We know this can't have changed as we hold the
@@ -343,9 +389,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                                 * uptodate. */
                                status = -EIO;
                                clear_buffer_needs_validate(bh);
-                               put_bh(bh);
-                               bhs[i] = NULL;
-                               continue;
+                               goto read_failure;
                        }
 
                        if (buffer_needs_validate(bh)) {
@@ -355,11 +399,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                                BUG_ON(buffer_jbd(bh));
                                clear_buffer_needs_validate(bh);
                                status = validate(sb, bh);
-                               if (status) {
-                                       put_bh(bh);
-                                       bhs[i] = NULL;
-                                       continue;
-                               }
+                               if (status)
+                                       goto read_failure;
                        }
                }
 
index 7d9eea7d4a87339a58a1f17066c476af59d93642..e9f236af1927d3aaf4bf495ae4a35234c5c94041 100644 (file)
@@ -916,7 +916,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
 {
        struct kvec vec = { .iov_len = len, .iov_base = data, };
        struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, len);
+       iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len);
        return sock_recvmsg(sock, &msg, MSG_DONTWAIT);
 }
 
index b048d4fa3959081bd1a857f0283d398b84515752..c121abbdfc7dbcfb28675aa7e62a4cb9a70633a1 100644 (file)
@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
                                /* On error, skip the f_pos to the
                                   next block. */
                                ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
-                               brelse(bh);
-                               continue;
+                               break;
                        }
                        if (le64_to_cpu(de->inode)) {
                                unsigned char d_type = DT_UNKNOWN;
index 933aac5da193415643b34a33e14db4fdb6fc29b5..7c835824247eb7a64446467b03080f07f67aa90e 100644 (file)
@@ -2123,10 +2123,10 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
 
 /* LVB only has room for 64 bits of time here so we pack it for
  * now. */
-static u64 ocfs2_pack_timespec(struct timespec *spec)
+static u64 ocfs2_pack_timespec(struct timespec64 *spec)
 {
        u64 res;
-       u64 sec = spec->tv_sec;
+       u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull);
        u32 nsec = spec->tv_nsec;
 
        res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
@@ -2142,7 +2142,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
        struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
        struct ocfs2_meta_lvb *lvb;
-       struct timespec ts;
 
        lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
@@ -2163,15 +2162,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
        lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
        lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
        lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
-       ts = timespec64_to_timespec(inode->i_atime);
        lvb->lvb_iatime_packed  =
-               cpu_to_be64(ocfs2_pack_timespec(&ts));
-       ts = timespec64_to_timespec(inode->i_ctime);
+               cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
        lvb->lvb_ictime_packed =
-               cpu_to_be64(ocfs2_pack_timespec(&ts));
-       ts = timespec64_to_timespec(inode->i_mtime);
+               cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
        lvb->lvb_imtime_packed =
-               cpu_to_be64(ocfs2_pack_timespec(&ts));
+               cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
        lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
        lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
        lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
@@ -2180,7 +2176,7 @@ out:
        mlog_meta_lvb(0, lockres);
 }
 
-static void ocfs2_unpack_timespec(struct timespec *spec,
+static void ocfs2_unpack_timespec(struct timespec64 *spec,
                                  u64 packed_time)
 {
        spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
@@ -2189,7 +2185,6 @@ static void ocfs2_unpack_timespec(struct timespec *spec,
 
 static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 {
-       struct timespec ts;
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
        struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
        struct ocfs2_meta_lvb *lvb;
@@ -2217,15 +2212,12 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
        i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
        inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
        set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
-       ocfs2_unpack_timespec(&ts,
+       ocfs2_unpack_timespec(&inode->i_atime,
                              be64_to_cpu(lvb->lvb_iatime_packed));
-       inode->i_atime = timespec_to_timespec64(ts);
-       ocfs2_unpack_timespec(&ts,
+       ocfs2_unpack_timespec(&inode->i_mtime,
                              be64_to_cpu(lvb->lvb_imtime_packed));
-       inode->i_mtime = timespec_to_timespec64(ts);
-       ocfs2_unpack_timespec(&ts,
+       ocfs2_unpack_timespec(&inode->i_ctime,
                              be64_to_cpu(lvb->lvb_ictime_packed));
-       inode->i_ctime = timespec_to_timespec64(ts);
        spin_unlock(&oi->ip_lock);
 }
 
@@ -3603,7 +3595,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
         * we can recover correctly from node failure. Otherwise, we may get
         * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
         */
-       if (!ocfs2_is_o2cb_active() &&
+       if (ocfs2_userspace_stack(osb) &&
            lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
                lvb = 1;
 
index 9fa35cb6f6e0b5b38023f45512fc75200be4c694..d640c5f8a85da8fc4ba030b295db0233b377ea7f 100644 (file)
@@ -2343,7 +2343,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
 
        written = __generic_file_write_iter(iocb, from);
        /* buffered aio wouldn't have proper lock coverage today */
-       BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
+       BUG_ON(written == -EIOCBQUEUED && !direct_io);
 
        /*
         * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
@@ -2463,7 +2463,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
        trace_generic_file_read_iter_ret(ret);
 
        /* buffered aio wouldn't have proper lock coverage today */
-       BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
+       BUG_ON(ret == -EIOCBQUEUED && !direct_io);
 
        /* see ocfs2_file_write_iter */
        if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
@@ -2527,24 +2527,79 @@ out:
        return offset;
 }
 
-static int ocfs2_file_clone_range(struct file *file_in,
-                                 loff_t pos_in,
-                                 struct file *file_out,
-                                 loff_t pos_out,
-                                 u64 len)
+static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
+                                    struct file *file_out, loff_t pos_out,
+                                    loff_t len, unsigned int remap_flags)
 {
-       return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-                                        len, false);
-}
+       struct inode *inode_in = file_inode(file_in);
+       struct inode *inode_out = file_inode(file_out);
+       struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
+       struct buffer_head *in_bh = NULL, *out_bh = NULL;
+       bool same_inode = (inode_in == inode_out);
+       loff_t remapped = 0;
+       ssize_t ret;
 
-static int ocfs2_file_dedupe_range(struct file *file_in,
-                                  loff_t pos_in,
-                                  struct file *file_out,
-                                  loff_t pos_out,
-                                  u64 len)
-{
-       return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-                                         len, true);
+       if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+               return -EINVAL;
+       if (!ocfs2_refcount_tree(osb))
+               return -EOPNOTSUPP;
+       if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+               return -EROFS;
+
+       /* Lock both files against IO */
+       ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
+       if (ret)
+               return ret;
+
+       /* Check file eligibility and prepare for block sharing. */
+       ret = -EINVAL;
+       if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
+           (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
+               goto out_unlock;
+
+       ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
+                       &len, remap_flags);
+       if (ret < 0 || len == 0)
+               goto out_unlock;
+
+       /* Lock out changes to the allocation maps and remap. */
+       down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+       if (!same_inode)
+               down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
+                                 SINGLE_DEPTH_NESTING);
+
+       /* Zap any page cache for the destination file's range. */
+       truncate_inode_pages_range(&inode_out->i_data,
+                                  round_down(pos_out, PAGE_SIZE),
+                                  round_up(pos_out + len, PAGE_SIZE) - 1);
+
+       remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in,
+                       inode_out, out_bh, pos_out, len);
+       up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+       if (!same_inode)
+               up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
+       if (remapped < 0) {
+               ret = remapped;
+               mlog_errno(ret);
+               goto out_unlock;
+       }
+
+       /*
+        * Empty the extent map so that we may get the right extent
+        * record from the disk.
+        */
+       ocfs2_extent_map_trunc(inode_in, 0);
+       ocfs2_extent_map_trunc(inode_out, 0);
+
+       ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
+       if (ret) {
+               mlog_errno(ret);
+               goto out_unlock;
+       }
+
+out_unlock:
+       ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
+       return remapped > 0 ? remapped : ret;
 }
 
 const struct inode_operations ocfs2_file_iops = {
@@ -2586,8 +2641,7 @@ const struct file_operations ocfs2_fops = {
        .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = ocfs2_fallocate,
-       .clone_file_range = ocfs2_file_clone_range,
-       .dedupe_file_range = ocfs2_file_dedupe_range,
+       .remap_file_range = ocfs2_remap_file_range,
 };
 
 const struct file_operations ocfs2_dops = {
@@ -2633,8 +2687,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
        .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = ocfs2_fallocate,
-       .clone_file_range = ocfs2_file_clone_range,
-       .dedupe_file_range = ocfs2_file_dedupe_range,
+       .remap_file_range = ocfs2_remap_file_range,
 };
 
 const struct file_operations ocfs2_dops_no_plocks = {
index bd3475694e83a06501a055e73fd1403f81123eef..b63c97f4318e063889fe1ca203d19092c1abbedf 100644 (file)
@@ -1378,15 +1378,23 @@ static int __ocfs2_recovery_thread(void *arg)
        int rm_quota_used = 0, i;
        struct ocfs2_quota_recovery *qrec;
 
+       /* Whether the quota supported. */
+       int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
+                       OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
+               || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
+                       OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
+
        status = ocfs2_wait_on_mount(osb);
        if (status < 0) {
                goto bail;
        }
 
-       rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
-       if (!rm_quota) {
-               status = -ENOMEM;
-               goto bail;
+       if (quota_enabled) {
+               rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
+               if (!rm_quota) {
+                       status = -ENOMEM;
+                       goto bail;
+               }
        }
 restart:
        status = ocfs2_super_lock(osb, 1);
@@ -1422,9 +1430,14 @@ restart:
                 * then quota usage would be out of sync until some node takes
                 * the slot. So we remember which nodes need quota recovery
                 * and when everything else is done, we recover quotas. */
-               for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
-               if (i == rm_quota_used)
-                       rm_quota[rm_quota_used++] = slot_num;
+               if (quota_enabled) {
+                       for (i = 0; i < rm_quota_used
+                                       && rm_quota[i] != slot_num; i++)
+                               ;
+
+                       if (i == rm_quota_used)
+                               rm_quota[rm_quota_used++] = slot_num;
+               }
 
                status = ocfs2_recover_node(osb, node_num, slot_num);
 skip_recovery:
@@ -1452,16 +1465,19 @@ skip_recovery:
        /* Now it is right time to recover quotas... We have to do this under
         * superblock lock so that no one can start using the slot (and crash)
         * before we recover it */
-       for (i = 0; i < rm_quota_used; i++) {
-               qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
-               if (IS_ERR(qrec)) {
-                       status = PTR_ERR(qrec);
-                       mlog_errno(status);
-                       continue;
+       if (quota_enabled) {
+               for (i = 0; i < rm_quota_used; i++) {
+                       qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
+                       if (IS_ERR(qrec)) {
+                               status = PTR_ERR(qrec);
+                               mlog_errno(status);
+                               continue;
+                       }
+                       ocfs2_queue_recovery_completion(osb->journal,
+                                       rm_quota[i],
+                                       NULL, NULL, qrec,
+                                       ORPHAN_NEED_TRUNCATE);
                }
-               ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
-                                               NULL, NULL, qrec,
-                                               ORPHAN_NEED_TRUNCATE);
        }
 
        ocfs2_super_unlock(osb, 1);
@@ -1483,7 +1499,8 @@ bail:
 
        mutex_unlock(&osb->recovery_lock);
 
-       kfree(rm_quota);
+       if (quota_enabled)
+               kfree(rm_quota);
 
        /* no one is callint kthread_stop() for us so the kthread() api
         * requires that we call do_exit().  And it isn't exported, but
index 7eb3b0a6347ef74990589ac47f5b4823441c8135..3f1685d7d43bf37a26161bfaeaaeefef3831820b 100644 (file)
@@ -25,6 +25,7 @@
 #include "ocfs2_ioctl.h"
 
 #include "alloc.h"
+#include "localalloc.h"
 #include "aops.h"
 #include "dlmglue.h"
 #include "extent_map.h"
@@ -233,6 +234,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
        struct ocfs2_refcount_tree *ref_tree = NULL;
        u32 new_phys_cpos, new_len;
        u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
+       int need_free = 0;
 
        if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
                BUG_ON(!ocfs2_is_refcount_inode(inode));
@@ -308,6 +310,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
                if (!partial) {
                        context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
                        ret = -ENOSPC;
+                       need_free = 1;
                        goto out_commit;
                }
        }
@@ -332,6 +335,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
                mlog_errno(ret);
 
 out_commit:
+       if (need_free && context->data_ac) {
+               struct ocfs2_alloc_context *data_ac = context->data_ac;
+
+               if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+                       ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+                                       new_phys_cpos, new_len);
+               else
+                       ocfs2_free_clusters(handle,
+                                       data_ac->ac_inode,
+                                       data_ac->ac_bh,
+                                       ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos),
+                                       new_len);
+       }
+
        ocfs2_commit_trans(osb, handle);
 
 out_unlock_mutex:
index 1114ef02e7803f0594ea6744fa8a39ffe0de68c0..a35259eebc56739b59bf7ffb5029e647dc11ad0f 100644 (file)
@@ -4466,9 +4466,9 @@ out:
 }
 
 /* Update destination inode size, if necessary. */
-static int ocfs2_reflink_update_dest(struct inode *dest,
-                                    struct buffer_head *d_bh,
-                                    loff_t newlen)
+int ocfs2_reflink_update_dest(struct inode *dest,
+                             struct buffer_head *d_bh,
+                             loff_t newlen)
 {
        handle_t *handle;
        int ret;
@@ -4505,14 +4505,14 @@ out_commit:
 }
 
 /* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */
-static int ocfs2_reflink_remap_extent(struct inode *s_inode,
-                                     struct buffer_head *s_bh,
-                                     loff_t pos_in,
-                                     struct inode *t_inode,
-                                     struct buffer_head *t_bh,
-                                     loff_t pos_out,
-                                     loff_t len,
-                                     struct ocfs2_cached_dealloc_ctxt *dealloc)
+static loff_t ocfs2_reflink_remap_extent(struct inode *s_inode,
+                                        struct buffer_head *s_bh,
+                                        loff_t pos_in,
+                                        struct inode *t_inode,
+                                        struct buffer_head *t_bh,
+                                        loff_t pos_out,
+                                        loff_t len,
+                                        struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
        struct ocfs2_extent_tree s_et;
        struct ocfs2_extent_tree t_et;
@@ -4520,8 +4520,9 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
        struct buffer_head *ref_root_bh = NULL;
        struct ocfs2_refcount_tree *ref_tree;
        struct ocfs2_super *osb;
+       loff_t remapped_bytes = 0;
        loff_t pstart, plen;
-       u32 p_cluster, num_clusters, slast, spos, tpos;
+       u32 p_cluster, num_clusters, slast, spos, tpos, remapped_clus = 0;
        unsigned int ext_flags;
        int ret = 0;
 
@@ -4603,30 +4604,34 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
 next_loop:
                spos += num_clusters;
                tpos += num_clusters;
+               remapped_clus += num_clusters;
        }
 
-out:
-       return ret;
+       goto out;
 out_unlock_refcount:
        ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
        brelse(ref_root_bh);
-       return ret;
+out:
+       remapped_bytes = ocfs2_clusters_to_bytes(t_inode->i_sb, remapped_clus);
+       remapped_bytes = min_t(loff_t, len, remapped_bytes);
+
+       return remapped_bytes > 0 ? remapped_bytes : ret;
 }
 
 /* Set up refcount tree and remap s_inode to t_inode. */
-static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
-                                     struct buffer_head *s_bh,
-                                     loff_t pos_in,
-                                     struct inode *t_inode,
-                                     struct buffer_head *t_bh,
-                                     loff_t pos_out,
-                                     loff_t len)
+loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
+                                 struct buffer_head *s_bh,
+                                 loff_t pos_in,
+                                 struct inode *t_inode,
+                                 struct buffer_head *t_bh,
+                                 loff_t pos_out,
+                                 loff_t len)
 {
        struct ocfs2_cached_dealloc_ctxt dealloc;
        struct ocfs2_super *osb;
        struct ocfs2_dinode *dis;
        struct ocfs2_dinode *dit;
-       int ret;
+       loff_t ret;
 
        osb = OCFS2_SB(s_inode->i_sb);
        dis = (struct ocfs2_dinode *)s_bh->b_data;
@@ -4698,7 +4703,7 @@ static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
        /* Actually remap extents now. */
        ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh,
                                         pos_out, len, &dealloc);
-       if (ret) {
+       if (ret < 0) {
                mlog_errno(ret);
                goto out;
        }
@@ -4713,10 +4718,10 @@ out:
 }
 
 /* Lock an inode and grab a bh pointing to the inode. */
-static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
-                                    struct buffer_head **bh1,
-                                    struct inode *t_inode,
-                                    struct buffer_head **bh2)
+int ocfs2_reflink_inodes_lock(struct inode *s_inode,
+                             struct buffer_head **bh1,
+                             struct inode *t_inode,
+                             struct buffer_head **bh2)
 {
        struct inode *inode1;
        struct inode *inode2;
@@ -4801,10 +4806,10 @@ out_i1:
 }
 
 /* Unlock both inodes and release buffers. */
-static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
-                                       struct buffer_head *s_bh,
-                                       struct inode *t_inode,
-                                       struct buffer_head *t_bh)
+void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
+                                struct buffer_head *s_bh,
+                                struct inode *t_inode,
+                                struct buffer_head *t_bh)
 {
        ocfs2_inode_unlock(s_inode, 1);
        ocfs2_rw_unlock(s_inode, 1);
@@ -4816,82 +4821,3 @@ static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
        }
        unlock_two_nondirectories(s_inode, t_inode);
 }
-
-/* Link a range of blocks from one file to another. */
-int ocfs2_reflink_remap_range(struct file *file_in,
-                             loff_t pos_in,
-                             struct file *file_out,
-                             loff_t pos_out,
-                             u64 len,
-                             bool is_dedupe)
-{
-       struct inode *inode_in = file_inode(file_in);
-       struct inode *inode_out = file_inode(file_out);
-       struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
-       struct buffer_head *in_bh = NULL, *out_bh = NULL;
-       bool same_inode = (inode_in == inode_out);
-       ssize_t ret;
-
-       if (!ocfs2_refcount_tree(osb))
-               return -EOPNOTSUPP;
-       if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
-               return -EROFS;
-
-       /* Lock both files against IO */
-       ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
-       if (ret)
-               return ret;
-
-       /* Check file eligibility and prepare for block sharing. */
-       ret = -EINVAL;
-       if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
-           (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
-               goto out_unlock;
-
-       ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
-                       &len, is_dedupe);
-       if (ret <= 0)
-               goto out_unlock;
-
-       /* Lock out changes to the allocation maps and remap. */
-       down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
-       if (!same_inode)
-               down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
-                                 SINGLE_DEPTH_NESTING);
-
-       ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
-                                        out_bh, pos_out, len);
-
-       /* Zap any page cache for the destination file's range. */
-       if (!ret)
-               truncate_inode_pages_range(&inode_out->i_data, pos_out,
-                                          PAGE_ALIGN(pos_out + len) - 1);
-
-       up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
-       if (!same_inode)
-               up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
-       if (ret) {
-               mlog_errno(ret);
-               goto out_unlock;
-       }
-
-       /*
-        * Empty the extent map so that we may get the right extent
-        * record from the disk.
-        */
-       ocfs2_extent_map_trunc(inode_in, 0);
-       ocfs2_extent_map_trunc(inode_out, 0);
-
-       ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
-       if (ret) {
-               mlog_errno(ret);
-               goto out_unlock;
-       }
-
-       ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
-       return 0;
-
-out_unlock:
-       ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
-       return ret;
-}
index 4af55bf4b35b977355fd1df6826e1ddf493f9736..e9e862be4a1e51cb55fe129d996bf08731d8c220 100644 (file)
@@ -115,11 +115,23 @@ int ocfs2_reflink_ioctl(struct inode *inode,
                        const char __user *oldname,
                        const char __user *newname,
                        bool preserve);
-int ocfs2_reflink_remap_range(struct file *file_in,
-                             loff_t pos_in,
-                             struct file *file_out,
-                             loff_t pos_out,
-                             u64 len,
-                             bool is_dedupe);
+loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
+                                 struct buffer_head *s_bh,
+                                 loff_t pos_in,
+                                 struct inode *t_inode,
+                                 struct buffer_head *t_bh,
+                                 loff_t pos_out,
+                                 loff_t len);
+int ocfs2_reflink_inodes_lock(struct inode *s_inode,
+                             struct buffer_head **bh1,
+                             struct inode *t_inode,
+                             struct buffer_head **bh2);
+void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
+                                struct buffer_head *s_bh,
+                                struct inode *t_inode,
+                                struct buffer_head *t_bh);
+int ocfs2_reflink_update_dest(struct inode *dest,
+                             struct buffer_head *d_bh,
+                             loff_t newlen);
 
 #endif /* OCFS2_REFCOUNTTREE_H */
index d6c350ba25b96ec9886cdc11b46a94fb17769261..c4b029c43464e0d14424a8a9af216d9168ca4bc9 100644 (file)
@@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
  */
 static struct ocfs2_stack_plugin *active_stack;
 
-inline int ocfs2_is_o2cb_active(void)
-{
-       return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
-}
-EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
-
 static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
 {
        struct ocfs2_stack_plugin *p;
index e3036e1790e86da7b4e13dcb0b8c88e3f19b6d50..f2dce10fae543c254dcb4e6628d357b60a3ac16c 100644 (file)
@@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
 int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
 void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
 
-/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
-int ocfs2_is_o2cb_active(void);
-
 extern struct kset *ocfs2_kset;
 
 #endif  /* STACKGLUE_H */
index 5e65d818937bb1f03ba2f964780a5fd5bfb1f880..fe53381b26b1841c3cae47dbe630d400e6021af8 100644 (file)
@@ -25,7 +25,7 @@ static int read_one_page(struct page *page)
        struct iov_iter to;
        struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};
 
-       iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE);
+       iov_iter_bvec(&to, READ, &bv, 1, PAGE_SIZE);
 
        gossip_debug(GOSSIP_INODE_DEBUG,
                    "orangefs_readpage called with page %p\n",
index 1cc797a08a5b5f7eb6c002862c834177e5d6f93c..9e62dcf06fc4a911dd777d2c7ff0a992190b5a0f 100644 (file)
@@ -125,6 +125,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
        struct file *new_file;
        loff_t old_pos = 0;
        loff_t new_pos = 0;
+       loff_t cloned;
        int error = 0;
 
        if (len == 0)
@@ -141,11 +142,10 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
        }
 
        /* Try to use clone_file_range to clone up within the same fs */
-       error = do_clone_file_range(old_file, 0, new_file, 0, len);
-       if (!error)
+       cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
+       if (cloned == len)
                goto out;
        /* Couldn't clone, so now we try to copy the data */
-       error = 0;
 
        /* FIXME: copy up sparse files efficiently */
        while (len) {
@@ -395,7 +395,6 @@ struct ovl_copy_up_ctx {
        struct dentry *destdir;
        struct qstr destname;
        struct dentry *workdir;
-       bool tmpfile;
        bool origin;
        bool indexed;
        bool metacopy;
@@ -440,63 +439,6 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
        return err;
 }
 
-static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
-                           struct dentry **newdentry)
-{
-       int err;
-       struct dentry *upper;
-       struct inode *udir = d_inode(c->destdir);
-
-       upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
-       if (IS_ERR(upper))
-               return PTR_ERR(upper);
-
-       if (c->tmpfile)
-               err = ovl_do_link(temp, udir, upper);
-       else
-               err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
-
-       if (!err)
-               *newdentry = dget(c->tmpfile ? upper : temp);
-       dput(upper);
-
-       return err;
-}
-
-static struct dentry *ovl_get_tmpfile(struct ovl_copy_up_ctx *c)
-{
-       int err;
-       struct dentry *temp;
-       const struct cred *old_creds = NULL;
-       struct cred *new_creds = NULL;
-       struct ovl_cattr cattr = {
-               /* Can't properly set mode on creation because of the umask */
-               .mode = c->stat.mode & S_IFMT,
-               .rdev = c->stat.rdev,
-               .link = c->link
-       };
-
-       err = security_inode_copy_up(c->dentry, &new_creds);
-       temp = ERR_PTR(err);
-       if (err < 0)
-               goto out;
-
-       if (new_creds)
-               old_creds = override_creds(new_creds);
-
-       if (c->tmpfile)
-               temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
-       else
-               temp = ovl_create_temp(c->workdir, &cattr);
-out:
-       if (new_creds) {
-               revert_creds(old_creds);
-               put_cred(new_creds);
-       }
-
-       return temp;
-}
-
 static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 {
        int err;
@@ -548,51 +490,148 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
        return err;
 }
 
-static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
+struct ovl_cu_creds {
+       const struct cred *old;
+       struct cred *new;
+};
+
+static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
+{
+       int err;
+
+       cc->old = cc->new = NULL;
+       err = security_inode_copy_up(dentry, &cc->new);
+       if (err < 0)
+               return err;
+
+       if (cc->new)
+               cc->old = override_creds(cc->new);
+
+       return 0;
+}
+
+static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
+{
+       if (cc->new) {
+               revert_creds(cc->old);
+               put_cred(cc->new);
+       }
+}
+
+/*
+ * Copyup using workdir to prepare temp file.  Used when copying up directories,
+ * special files or when upper fs doesn't support O_TMPFILE.
+ */
+static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 {
-       struct inode *udir = c->destdir->d_inode;
        struct inode *inode;
-       struct dentry *newdentry = NULL;
-       struct dentry *temp;
+       struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
+       struct dentry *temp, *upper;
+       struct ovl_cu_creds cc;
        int err;
+       struct ovl_cattr cattr = {
+               /* Can't properly set mode on creation because of the umask */
+               .mode = c->stat.mode & S_IFMT,
+               .rdev = c->stat.rdev,
+               .link = c->link
+       };
+
+       err = ovl_lock_rename_workdir(c->workdir, c->destdir);
+       if (err)
+               return err;
+
+       err = ovl_prep_cu_creds(c->dentry, &cc);
+       if (err)
+               goto unlock;
 
-       temp = ovl_get_tmpfile(c);
+       temp = ovl_create_temp(c->workdir, &cattr);
+       ovl_revert_cu_creds(&cc);
+
+       err = PTR_ERR(temp);
        if (IS_ERR(temp))
-               return PTR_ERR(temp);
+               goto unlock;
 
        err = ovl_copy_up_inode(c, temp);
        if (err)
-               goto out;
+               goto cleanup;
 
        if (S_ISDIR(c->stat.mode) && c->indexed) {
                err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
                if (err)
-                       goto out;
+                       goto cleanup;
        }
 
-       if (c->tmpfile) {
-               inode_lock_nested(udir, I_MUTEX_PARENT);
-               err = ovl_install_temp(c, temp, &newdentry);
-               inode_unlock(udir);
-       } else {
-               err = ovl_install_temp(c, temp, &newdentry);
-       }
+       upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+       err = PTR_ERR(upper);
+       if (IS_ERR(upper))
+               goto cleanup;
+
+       err = ovl_do_rename(wdir, temp, udir, upper, 0);
+       dput(upper);
        if (err)
-               goto out;
+               goto cleanup;
 
        if (!c->metacopy)
                ovl_set_upperdata(d_inode(c->dentry));
        inode = d_inode(c->dentry);
-       ovl_inode_update(inode, newdentry);
+       ovl_inode_update(inode, temp);
        if (S_ISDIR(inode->i_mode))
                ovl_set_flag(OVL_WHITEOUTS, inode);
+unlock:
+       unlock_rename(c->workdir, c->destdir);
 
-out:
-       if (err && !c->tmpfile)
-               ovl_cleanup(d_inode(c->workdir), temp);
-       dput(temp);
        return err;
 
+cleanup:
+       ovl_cleanup(wdir, temp);
+       dput(temp);
+       goto unlock;
+}
+
+/* Copyup using O_TMPFILE which does not require cross dir locking */
+static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
+{
+       struct inode *udir = d_inode(c->destdir);
+       struct dentry *temp, *upper;
+       struct ovl_cu_creds cc;
+       int err;
+
+       err = ovl_prep_cu_creds(c->dentry, &cc);
+       if (err)
+               return err;
+
+       temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+       ovl_revert_cu_creds(&cc);
+
+       if (IS_ERR(temp))
+               return PTR_ERR(temp);
+
+       err = ovl_copy_up_inode(c, temp);
+       if (err)
+               goto out_dput;
+
+       inode_lock_nested(udir, I_MUTEX_PARENT);
+
+       upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+       err = PTR_ERR(upper);
+       if (!IS_ERR(upper)) {
+               err = ovl_do_link(temp, udir, upper);
+               dput(upper);
+       }
+       inode_unlock(udir);
+
+       if (err)
+               goto out_dput;
+
+       if (!c->metacopy)
+               ovl_set_upperdata(d_inode(c->dentry));
+       ovl_inode_update(d_inode(c->dentry), temp);
+
+       return 0;
+
+out_dput:
+       dput(temp);
+       return err;
 }
 
 /*
@@ -646,18 +685,10 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
        }
 
        /* Should we copyup with O_TMPFILE or with workdir? */
-       if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
-               c->tmpfile = true;
-               err = ovl_copy_up_locked(c);
-       } else {
-               err = ovl_lock_rename_workdir(c->workdir, c->destdir);
-               if (!err) {
-                       err = ovl_copy_up_locked(c);
-                       unlock_rename(c->workdir, c->destdir);
-               }
-       }
-
-
+       if (S_ISREG(c->stat.mode) && ofs->tmpfile)
+               err = ovl_copy_up_tmpfile(c);
+       else
+               err = ovl_copy_up_workdir(c);
        if (err)
                goto out;
 
index 276914ae3c60aaf297747f03b58d4582df335ec9..c6289147c7871f165b70f5ca8e13668f3010d4b0 100644 (file)
@@ -414,13 +414,12 @@ static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
        if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
                return 0;
 
-       size = posix_acl_to_xattr(NULL, acl, NULL, 0);
+       size = posix_acl_xattr_size(acl->a_count);
        buffer = kmalloc(size, GFP_KERNEL);
        if (!buffer)
                return -ENOMEM;
 
-       size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
-       err = size;
+       err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
        if (err < 0)
                goto out_free;
 
@@ -463,6 +462,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
        if (IS_ERR(upper))
                goto out_unlock;
 
+       err = -ESTALE;
+       if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
+               goto out_dput;
+
        newdentry = ovl_create_temp(workdir, cattr);
        err = PTR_ERR(newdentry);
        if (IS_ERR(newdentry))
@@ -652,7 +655,6 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
                    struct dentry *new)
 {
        int err;
-       bool locked = false;
        struct inode *inode;
 
        err = ovl_want_write(old);
@@ -663,13 +665,17 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
        if (err)
                goto out_drop_write;
 
+       err = ovl_copy_up(new->d_parent);
+       if (err)
+               goto out_drop_write;
+
        if (ovl_is_metacopy_dentry(old)) {
                err = ovl_set_redirect(old, false);
                if (err)
                        goto out_drop_write;
        }
 
-       err = ovl_nlink_start(old, &locked);
+       err = ovl_nlink_start(old);
        if (err)
                goto out_drop_write;
 
@@ -682,7 +688,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
        if (err)
                iput(inode);
 
-       ovl_nlink_end(old, locked);
+       ovl_nlink_end(old);
 out_drop_write:
        ovl_drop_write(old);
 out:
@@ -807,7 +813,6 @@ static bool ovl_pure_upper(struct dentry *dentry)
 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 {
        int err;
-       bool locked = false;
        const struct cred *old_cred;
        struct dentry *upperdentry;
        bool lower_positive = ovl_lower_positive(dentry);
@@ -828,7 +833,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
        if (err)
                goto out_drop_write;
 
-       err = ovl_nlink_start(dentry, &locked);
+       err = ovl_nlink_start(dentry);
        if (err)
                goto out_drop_write;
 
@@ -844,7 +849,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
                else
                        drop_nlink(dentry->d_inode);
        }
-       ovl_nlink_end(dentry, locked);
+       ovl_nlink_end(dentry);
 
        /*
         * Copy ctime
@@ -1008,7 +1013,6 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
                      unsigned int flags)
 {
        int err;
-       bool locked = false;
        struct dentry *old_upperdir;
        struct dentry *new_upperdir;
        struct dentry *olddentry;
@@ -1017,6 +1021,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
        bool old_opaque;
        bool new_opaque;
        bool cleanup_whiteout = false;
+       bool update_nlink = false;
        bool overwrite = !(flags & RENAME_EXCHANGE);
        bool is_dir = d_is_dir(old);
        bool new_is_dir = d_is_dir(new);
@@ -1074,10 +1079,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
                err = ovl_copy_up(new);
                if (err)
                        goto out_drop_write;
-       } else {
-               err = ovl_nlink_start(new, &locked);
+       } else if (d_inode(new)) {
+               err = ovl_nlink_start(new);
                if (err)
                        goto out_drop_write;
+
+               update_nlink = true;
        }
 
        old_cred = ovl_override_creds(old->d_sb);
@@ -1206,7 +1213,8 @@ out_unlock:
        unlock_rename(new_upperdir, old_upperdir);
 out_revert_creds:
        revert_creds(old_cred);
-       ovl_nlink_end(new, locked);
+       if (update_nlink)
+               ovl_nlink_end(new);
 out_drop_write:
        ovl_drop_write(old);
 out:
index 986313da0c8895352d2216f0fb0b78d3854064fb..84dd957efa24a17e8a66416117790896fd1d04ff 100644 (file)
@@ -434,14 +434,14 @@ enum ovl_copyop {
        OVL_DEDUPE,
 };
 
-static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
                            struct file *file_out, loff_t pos_out,
-                           u64 len, unsigned int flags, enum ovl_copyop op)
+                           loff_t len, unsigned int flags, enum ovl_copyop op)
 {
        struct inode *inode_out = file_inode(file_out);
        struct fd real_in, real_out;
        const struct cred *old_cred;
-       ssize_t ret;
+       loff_t ret;
 
        ret = ovl_real_fdget(file_out, &real_out);
        if (ret)
@@ -462,12 +462,13 @@ static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 
        case OVL_CLONE:
                ret = vfs_clone_file_range(real_in.file, pos_in,
-                                          real_out.file, pos_out, len);
+                                          real_out.file, pos_out, len, flags);
                break;
 
        case OVL_DEDUPE:
                ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
-                                               real_out.file, pos_out, len);
+                                               real_out.file, pos_out, len,
+                                               flags);
                break;
        }
        revert_creds(old_cred);
@@ -489,26 +490,31 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
                            OVL_COPY);
 }
 
-static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
-                               struct file *file_out, loff_t pos_out, u64 len)
+static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
+                                  struct file *file_out, loff_t pos_out,
+                                  loff_t len, unsigned int remap_flags)
 {
-       return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-                           OVL_CLONE);
-}
+       enum ovl_copyop op;
+
+       if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+               return -EINVAL;
+
+       if (remap_flags & REMAP_FILE_DEDUP)
+               op = OVL_DEDUPE;
+       else
+               op = OVL_CLONE;
 
-static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
-                                struct file *file_out, loff_t pos_out, u64 len)
-{
        /*
         * Don't copy up because of a dedupe request, this wouldn't make sense
         * most of the time (data would be duplicated instead of deduplicated).
         */
-       if (!ovl_inode_upper(file_inode(file_in)) ||
-           !ovl_inode_upper(file_inode(file_out)))
+       if (op == OVL_DEDUPE &&
+           (!ovl_inode_upper(file_inode(file_in)) ||
+            !ovl_inode_upper(file_inode(file_out))))
                return -EPERM;
 
-       return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-                           OVL_DEDUPE);
+       return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
+                           remap_flags, op);
 }
 
 const struct file_operations ovl_file_operations = {
@@ -525,6 +531,5 @@ const struct file_operations ovl_file_operations = {
        .compat_ioctl   = ovl_compat_ioctl,
 
        .copy_file_range        = ovl_copy_file_range,
-       .clone_file_range       = ovl_clone_file_range,
-       .dedupe_file_range      = ovl_dedupe_file_range,
+       .remap_file_range       = ovl_remap_file_range,
 };
index 3b7ed5d2279c6a8efde8180471bde94ef1020964..6bcc9dedc342cc7cf141abbc5220f4a0aa5ce1da 100644 (file)
@@ -286,13 +286,22 @@ int ovl_permission(struct inode *inode, int mask)
        if (err)
                return err;
 
-       old_cred = ovl_override_creds(inode->i_sb);
-       if (!upperinode &&
-           !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+       /* No need to do any access on underlying for special files */
+       if (special_file(realinode->i_mode))
+               return 0;
+
+       /* No need to access underlying for execute */
+       mask &= ~MAY_EXEC;
+       if ((mask & (MAY_READ | MAY_WRITE)) == 0)
+               return 0;
+
+       /* Lower files get copied up, so turn write access into read */
+       if (!upperinode && mask & MAY_WRITE) {
                mask &= ~(MAY_WRITE | MAY_APPEND);
-               /* Make sure mounter can read file for copy up later */
                mask |= MAY_READ;
        }
+
+       old_cred = ovl_override_creds(inode->i_sb);
        err = inode_permission(realinode, mask);
        revert_creds(old_cred);
 
index 9c0ca6a7becfbe56e15efd596fbc6540b4bbd859..efd372312ef1000709827eab0291b760ea224ae1 100644 (file)
@@ -422,8 +422,10 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
 
        fh = ovl_encode_real_fh(real, is_upper);
        err = PTR_ERR(fh);
-       if (IS_ERR(fh))
+       if (IS_ERR(fh)) {
+               fh = NULL;
                goto fail;
+       }
 
        err = ovl_verify_fh(dentry, name, fh);
        if (set && err == -ENODATA)
index a3c0d95843121e92a103a6b07628feb853c31399..5e45cb3630a06f37059d044412c3c089d84e28fe 100644 (file)
@@ -271,8 +271,8 @@ bool ovl_test_flag(unsigned long flag, struct inode *inode);
 bool ovl_inuse_trylock(struct dentry *dentry);
 void ovl_inuse_unlock(struct dentry *dentry);
 bool ovl_need_index(struct dentry *dentry);
-int ovl_nlink_start(struct dentry *dentry, bool *locked);
-void ovl_nlink_end(struct dentry *dentry, bool locked);
+int ovl_nlink_start(struct dentry *dentry);
+void ovl_nlink_end(struct dentry *dentry);
 int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
 int ovl_check_metacopy_xattr(struct dentry *dentry);
 bool ovl_is_metacopy_dentry(struct dentry *dentry);
@@ -290,6 +290,16 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
        return ofs->xino_bits;
 }
 
+static inline int ovl_inode_lock(struct inode *inode)
+{
+       return mutex_lock_interruptible(&OVL_I(inode)->lock);
+}
+
+static inline void ovl_inode_unlock(struct inode *inode)
+{
+       mutex_unlock(&OVL_I(inode)->lock);
+}
+
 
 /* namei.c */
 int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
index 30adc9d408a0df84455b86811ee468faf439c0c1..0116735cc32147ca3972275e2baa8a52d296f63f 100644 (file)
@@ -472,6 +472,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 {
        char *p;
        int err;
+       bool metacopy_opt = false, redirect_opt = false;
 
        config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
        if (!config->redirect_mode)
@@ -516,6 +517,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
                        config->redirect_mode = match_strdup(&args[0]);
                        if (!config->redirect_mode)
                                return -ENOMEM;
+                       redirect_opt = true;
                        break;
 
                case OPT_INDEX_ON:
@@ -548,6 +550,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 
                case OPT_METACOPY_ON:
                        config->metacopy = true;
+                       metacopy_opt = true;
                        break;
 
                case OPT_METACOPY_OFF:
@@ -572,13 +575,32 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
        if (err)
                return err;
 
-       /* metacopy feature with upper requires redirect_dir=on */
-       if (config->upperdir && config->metacopy && !config->redirect_dir) {
-               pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n");
-               config->metacopy = false;
-       } else if (config->metacopy && !config->redirect_follow) {
-               pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n");
-               config->metacopy = false;
+       /*
+        * This is to make the logic below simpler.  It doesn't make any other
+        * difference, since config->redirect_dir is only used for upper.
+        */
+       if (!config->upperdir && config->redirect_follow)
+               config->redirect_dir = true;
+
+       /* Resolve metacopy -> redirect_dir dependency */
+       if (config->metacopy && !config->redirect_dir) {
+               if (metacopy_opt && redirect_opt) {
+                       pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
+                              config->redirect_mode);
+                       return -EINVAL;
+               }
+               if (redirect_opt) {
+                       /*
+                        * There was an explicit redirect_dir=... that resulted
+                        * in this conflict.
+                        */
+                       pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
+                               config->redirect_mode);
+                       config->metacopy = false;
+               } else {
+                       /* Automatically enable redirect otherwise. */
+                       config->redirect_follow = config->redirect_dir = true;
+               }
        }
 
        return 0;
@@ -1175,9 +1197,29 @@ out:
        return err;
 }
 
+static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
+{
+       unsigned int i;
+
+       if (!ofs->config.nfs_export && !(ofs->config.index && ofs->upper_mnt))
+               return true;
+
+       for (i = 0; i < ofs->numlowerfs; i++) {
+               /*
+                * We use uuid to associate an overlay lower file handle with a
+                * lower layer, so we can accept lower fs with null uuid as long
+                * as all lower layers with null uuid are on the same fs.
+                */
+               if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid))
+                       return false;
+       }
+       return true;
+}
+
 /* Get a unique fsid for the layer */
-static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
+static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
 {
+       struct super_block *sb = path->mnt->mnt_sb;
        unsigned int i;
        dev_t dev;
        int err;
@@ -1191,6 +1233,14 @@ static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
                        return i + 1;
        }
 
+       if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
+               ofs->config.index = false;
+               ofs->config.nfs_export = false;
+               pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
+                       uuid_is_null(&sb->s_uuid) ? "null" : "conflicting",
+                       path->dentry);
+       }
+
        err = get_anon_bdev(&dev);
        if (err) {
                pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
@@ -1225,7 +1275,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
                struct vfsmount *mnt;
                int fsid;
 
-               err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
+               err = fsid = ovl_get_fsid(ofs, &stack[i]);
                if (err < 0)
                        goto out;
 
index ace4fe4c39a9307aa6008702f0195a92af74627c..7c01327b1852053c58feff725c3d5aac9e6b9717 100644 (file)
@@ -65,8 +65,7 @@ struct super_block *ovl_same_sb(struct super_block *sb)
  */
 int ovl_can_decode_fh(struct super_block *sb)
 {
-       if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry ||
-           uuid_is_null(&sb->s_uuid))
+       if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
                return 0;
 
        return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
@@ -522,13 +521,13 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags)
 
 int ovl_copy_up_start(struct dentry *dentry, int flags)
 {
-       struct ovl_inode *oi = OVL_I(d_inode(dentry));
+       struct inode *inode = d_inode(dentry);
        int err;
 
-       err = mutex_lock_interruptible(&oi->lock);
+       err = ovl_inode_lock(inode);
        if (!err && ovl_already_copied_up_locked(dentry, flags)) {
                err = 1; /* Already copied up */
-               mutex_unlock(&oi->lock);
+               ovl_inode_unlock(inode);
        }
 
        return err;
@@ -536,7 +535,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags)
 
 void ovl_copy_up_end(struct dentry *dentry)
 {
-       mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+       ovl_inode_unlock(d_inode(dentry));
 }
 
 bool ovl_check_origin_xattr(struct dentry *dentry)
@@ -739,14 +738,14 @@ fail:
  * Operations that change overlay inode and upper inode nlink need to be
  * synchronized with copy up for persistent nlink accounting.
  */
-int ovl_nlink_start(struct dentry *dentry, bool *locked)
+int ovl_nlink_start(struct dentry *dentry)
 {
-       struct ovl_inode *oi = OVL_I(d_inode(dentry));
+       struct inode *inode = d_inode(dentry);
        const struct cred *old_cred;
        int err;
 
-       if (!d_inode(dentry))
-               return 0;
+       if (WARN_ON(!inode))
+               return -ENOENT;
 
        /*
         * With inodes index is enabled, we store the union overlay nlink
@@ -768,11 +767,11 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
                        return err;
        }
 
-       err = mutex_lock_interruptible(&oi->lock);
+       err = ovl_inode_lock(inode);
        if (err)
                return err;
 
-       if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+       if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode))
                goto out;
 
        old_cred = ovl_override_creds(dentry->d_sb);
@@ -787,27 +786,24 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
 
 out:
        if (err)
-               mutex_unlock(&oi->lock);
-       else
-               *locked = true;
+               ovl_inode_unlock(inode);
 
        return err;
 }
 
-void ovl_nlink_end(struct dentry *dentry, bool locked)
+void ovl_nlink_end(struct dentry *dentry)
 {
-       if (locked) {
-               if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
-                   d_inode(dentry)->i_nlink == 0) {
-                       const struct cred *old_cred;
+       struct inode *inode = d_inode(dentry);
 
-                       old_cred = ovl_override_creds(dentry->d_sb);
-                       ovl_cleanup_index(dentry);
-                       revert_creds(old_cred);
-               }
+       if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
+               const struct cred *old_cred;
 
-               mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+               old_cred = ovl_override_creds(dentry->d_sb);
+               ovl_cleanup_index(dentry);
+               revert_creds(old_cred);
        }
+
+       ovl_inode_unlock(inode);
 }
 
 int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
index 7e9f07bf260d20bb0a0cd4cd6b6b4abe82b23e20..ce34654794472d0a7b8c2574340c18cc7d594f7a 100644 (file)
@@ -2905,6 +2905,21 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
 }
 #endif /* CONFIG_LIVEPATCH */
 
+#ifdef CONFIG_STACKLEAK_METRICS
+static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
+                               struct pid *pid, struct task_struct *task)
+{
+       unsigned long prev_depth = THREAD_SIZE -
+                               (task->prev_lowest_stack & (THREAD_SIZE - 1));
+       unsigned long depth = THREAD_SIZE -
+                               (task->lowest_stack & (THREAD_SIZE - 1));
+
+       seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
+                                                       prev_depth, depth);
+       return 0;
+}
+#endif /* CONFIG_STACKLEAK_METRICS */
+
 /*
  * Thread groups
  */
@@ -3006,6 +3021,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_LIVEPATCH
        ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
 #endif
+#ifdef CONFIG_STACKLEAK_METRICS
+       ONE("stack_depth", S_IRUGO, proc_stack_depth),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
index 603794b207ebad39946e96bcbfff73332fc9a0a0..bfcb4ced5664c00f2fab706ba483094a4bc1ca5a 100644 (file)
@@ -1407,7 +1407,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                goto fput_in;
        if (!(out.file->f_mode & FMODE_WRITE))
                goto fput_out;
-       retval = -EINVAL;
        in_inode = file_inode(in.file);
        out_inode = file_inode(out.file);
        out_pos = out.file->f_pos;
@@ -1588,11 +1587,15 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
         * Try cloning first, this is supported by more file systems, and
         * more efficient if both clone and copy are supported (e.g. NFS).
         */
-       if (file_in->f_op->clone_file_range) {
-               ret = file_in->f_op->clone_file_range(file_in, pos_in,
-                               file_out, pos_out, len);
-               if (ret == 0) {
-                       ret = len;
+       if (file_in->f_op->remap_file_range) {
+               loff_t cloned;
+
+               cloned = file_in->f_op->remap_file_range(file_in, pos_in,
+                               file_out, pos_out,
+                               min_t(loff_t, MAX_RW_COUNT, len),
+                               REMAP_FILE_CAN_SHORTEN);
+               if (cloned > 0) {
+                       ret = cloned;
                        goto done;
                }
        }
@@ -1686,11 +1689,12 @@ out2:
        return ret;
 }
 
-static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
+                            bool write)
 {
        struct inode *inode = file_inode(file);
 
-       if (unlikely(pos < 0))
+       if (unlikely(pos < 0 || len < 0))
                return -EINVAL;
 
         if (unlikely((loff_t) (pos + len) < 0))
@@ -1708,22 +1712,150 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
 
        return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
 }
+/*
+ * Ensure that we don't remap a partial EOF block in the middle of something
+ * else.  Assume that the offsets have already been checked for block
+ * alignment.
+ *
+ * For deduplication we always scale down to the previous block because we
+ * can't meaningfully compare post-EOF contents.
+ *
+ * For clone we only link a partial EOF block above the destination file's EOF.
+ *
+ * Shorten the request if possible.
+ */
+static int generic_remap_check_len(struct inode *inode_in,
+                                  struct inode *inode_out,
+                                  loff_t pos_out,
+                                  loff_t *len,
+                                  unsigned int remap_flags)
+{
+       u64 blkmask = i_blocksize(inode_in) - 1;
+       loff_t new_len = *len;
+
+       if ((*len & blkmask) == 0)
+               return 0;
+
+       if ((remap_flags & REMAP_FILE_DEDUP) ||
+           pos_out + *len < i_size_read(inode_out))
+               new_len &= ~blkmask;
+
+       if (new_len == *len)
+               return 0;
+
+       if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
+               *len = new_len;
+               return 0;
+       }
+
+       return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
+}
+
+/*
+ * Read a page's worth of file data into the page cache.  Return the page
+ * locked.
+ */
+static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+{
+       struct page *page;
+
+       page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
+       if (IS_ERR(page))
+               return page;
+       if (!PageUptodate(page)) {
+               put_page(page);
+               return ERR_PTR(-EIO);
+       }
+       lock_page(page);
+       return page;
+}
+
+/*
+ * Compare extents of two files to see if they are the same.
+ * Caller must have locked both inodes to prevent write races.
+ */
+static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+                                        struct inode *dest, loff_t destoff,
+                                        loff_t len, bool *is_same)
+{
+       loff_t src_poff;
+       loff_t dest_poff;
+       void *src_addr;
+       void *dest_addr;
+       struct page *src_page;
+       struct page *dest_page;
+       loff_t cmp_len;
+       bool same;
+       int error;
+
+       error = -EINVAL;
+       same = true;
+       while (len) {
+               src_poff = srcoff & (PAGE_SIZE - 1);
+               dest_poff = destoff & (PAGE_SIZE - 1);
+               cmp_len = min(PAGE_SIZE - src_poff,
+                             PAGE_SIZE - dest_poff);
+               cmp_len = min(cmp_len, len);
+               if (cmp_len <= 0)
+                       goto out_error;
+
+               src_page = vfs_dedupe_get_page(src, srcoff);
+               if (IS_ERR(src_page)) {
+                       error = PTR_ERR(src_page);
+                       goto out_error;
+               }
+               dest_page = vfs_dedupe_get_page(dest, destoff);
+               if (IS_ERR(dest_page)) {
+                       error = PTR_ERR(dest_page);
+                       unlock_page(src_page);
+                       put_page(src_page);
+                       goto out_error;
+               }
+               src_addr = kmap_atomic(src_page);
+               dest_addr = kmap_atomic(dest_page);
+
+               flush_dcache_page(src_page);
+               flush_dcache_page(dest_page);
+
+               if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+                       same = false;
+
+               kunmap_atomic(dest_addr);
+               kunmap_atomic(src_addr);
+               unlock_page(dest_page);
+               unlock_page(src_page);
+               put_page(dest_page);
+               put_page(src_page);
+
+               if (!same)
+                       break;
+
+               srcoff += cmp_len;
+               destoff += cmp_len;
+               len -= cmp_len;
+       }
+
+       *is_same = same;
+       return 0;
+
+out_error:
+       return error;
+}
 
 /*
  * Check that the two inodes are eligible for cloning, the ranges make
  * sense, and then flush all dirty data.  Caller must ensure that the
  * inodes have been locked against any other modifications.
  *
- * Returns: 0 for "nothing to clone", 1 for "something to clone", or
- * the usual negative error code.
+ * If there's an error, then the usual negative error code is returned.
+ * Otherwise returns 0 with *len set to the request length.
  */
-int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-                              struct inode *inode_out, loff_t pos_out,
-                              u64 *len, bool is_dedupe)
+int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+                                 struct file *file_out, loff_t pos_out,
+                                 loff_t *len, unsigned int remap_flags)
 {
-       loff_t bs = inode_out->i_sb->s_blocksize;
-       loff_t blen;
-       loff_t isize;
+       struct inode *inode_in = file_inode(file_in);
+       struct inode *inode_out = file_inode(file_out);
        bool same_inode = (inode_in == inode_out);
        int ret;
 
@@ -1740,50 +1872,24 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
        if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
                return -EINVAL;
 
-       /* Are we going all the way to the end? */
-       isize = i_size_read(inode_in);
-       if (isize == 0)
-               return 0;
-
        /* Zero length dedupe exits immediately; reflink goes to EOF. */
        if (*len == 0) {
-               if (is_dedupe || pos_in == isize)
+               loff_t isize = i_size_read(inode_in);
+
+               if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
                        return 0;
                if (pos_in > isize)
                        return -EINVAL;
                *len = isize - pos_in;
+               if (*len == 0)
+                       return 0;
        }
 
-       /* Ensure offsets don't wrap and the input is inside i_size */
-       if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
-           pos_in + *len > isize)
-               return -EINVAL;
-
-       /* Don't allow dedupe past EOF in the dest file */
-       if (is_dedupe) {
-               loff_t  disize;
-
-               disize = i_size_read(inode_out);
-               if (pos_out >= disize || pos_out + *len > disize)
-                       return -EINVAL;
-       }
-
-       /* If we're linking to EOF, continue to the block boundary. */
-       if (pos_in + *len == isize)
-               blen = ALIGN(isize, bs) - pos_in;
-       else
-               blen = *len;
-
-       /* Only reflink if we're aligned to block boundaries */
-       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-           !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-               return -EINVAL;
-
-       /* Don't allow overlapped reflink within the same file */
-       if (same_inode) {
-               if (pos_out + blen > pos_in && pos_out < pos_in + blen)
-                       return -EINVAL;
-       }
+       /* Check that we don't violate system file offset limits. */
+       ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
+                       remap_flags);
+       if (ret)
+               return ret;
 
        /* Wait for the completion of any pending IOs on both files */
        inode_dio_wait(inode_in);
@@ -1803,7 +1909,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
        /*
         * Check that the extents are the same.
         */
-       if (is_dedupe) {
+       if (remap_flags & REMAP_FILE_DEDUP) {
                bool            is_same = false;
 
                ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
@@ -1814,16 +1920,43 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
                        return -EBADE;
        }
 
-       return 1;
+       ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
+                       remap_flags);
+       if (ret)
+               return ret;
+
+       /* If can't alter the file contents, we're done. */
+       if (!(remap_flags & REMAP_FILE_DEDUP)) {
+               /* Update the timestamps, since we can alter file contents. */
+               if (!(file_out->f_mode & FMODE_NOCMTIME)) {
+                       ret = file_update_time(file_out);
+                       if (ret)
+                               return ret;
+               }
+
+               /*
+                * Clear the security bits if the process is not being run by
+                * root.  This keeps people from modifying setuid and setgid
+                * binaries.
+                */
+               ret = file_remove_privs(file_out);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
 }
-EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
+EXPORT_SYMBOL(generic_remap_file_range_prep);
 
-int do_clone_file_range(struct file *file_in, loff_t pos_in,
-                       struct file *file_out, loff_t pos_out, u64 len)
+loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+                          struct file *file_out, loff_t pos_out,
+                          loff_t len, unsigned int remap_flags)
 {
        struct inode *inode_in = file_inode(file_in);
        struct inode *inode_out = file_inode(file_out);
-       int ret;
+       loff_t ret;
+
+       WARN_ON_ONCE(remap_flags);
 
        if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
                return -EISDIR;
@@ -1843,155 +1976,76 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
            (file_out->f_flags & O_APPEND))
                return -EBADF;
 
-       if (!file_in->f_op->clone_file_range)
+       if (!file_in->f_op->remap_file_range)
                return -EOPNOTSUPP;
 
-       ret = clone_verify_area(file_in, pos_in, len, false);
+       ret = remap_verify_area(file_in, pos_in, len, false);
        if (ret)
                return ret;
 
-       ret = clone_verify_area(file_out, pos_out, len, true);
+       ret = remap_verify_area(file_out, pos_out, len, true);
        if (ret)
                return ret;
 
-       if (pos_in + len > i_size_read(inode_in))
-               return -EINVAL;
-
-       ret = file_in->f_op->clone_file_range(file_in, pos_in,
-                       file_out, pos_out, len);
-       if (!ret) {
-               fsnotify_access(file_in);
-               fsnotify_modify(file_out);
-       }
+       ret = file_in->f_op->remap_file_range(file_in, pos_in,
+                       file_out, pos_out, len, remap_flags);
+       if (ret < 0)
+               return ret;
 
+       fsnotify_access(file_in);
+       fsnotify_modify(file_out);
        return ret;
 }
 EXPORT_SYMBOL(do_clone_file_range);
 
-int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-                        struct file *file_out, loff_t pos_out, u64 len)
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+                           struct file *file_out, loff_t pos_out,
+                           loff_t len, unsigned int remap_flags)
 {
-       int ret;
+       loff_t ret;
 
        file_start_write(file_out);
-       ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+       ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
+                                 remap_flags);
        file_end_write(file_out);
 
        return ret;
 }
 EXPORT_SYMBOL(vfs_clone_file_range);
 
-/*
- * Read a page's worth of file data into the page cache.  Return the page
- * locked.
- */
-static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+/* Check whether we are allowed to dedupe the destination file */
+static bool allow_file_dedupe(struct file *file)
 {
-       struct address_space *mapping;
-       struct page *page;
-       pgoff_t n;
-
-       n = offset >> PAGE_SHIFT;
-       mapping = inode->i_mapping;
-       page = read_mapping_page(mapping, n, NULL);
-       if (IS_ERR(page))
-               return page;
-       if (!PageUptodate(page)) {
-               put_page(page);
-               return ERR_PTR(-EIO);
-       }
-       lock_page(page);
-       return page;
+       if (capable(CAP_SYS_ADMIN))
+               return true;
+       if (file->f_mode & FMODE_WRITE)
+               return true;
+       if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
+               return true;
+       if (!inode_permission(file_inode(file), MAY_WRITE))
+               return true;
+       return false;
 }
 
-/*
- * Compare extents of two files to see if they are the same.
- * Caller must have locked both inodes to prevent write races.
- */
-int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
-                                 struct inode *dest, loff_t destoff,
-                                 loff_t len, bool *is_same)
+loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+                                struct file *dst_file, loff_t dst_pos,
+                                loff_t len, unsigned int remap_flags)
 {
-       loff_t src_poff;
-       loff_t dest_poff;
-       void *src_addr;
-       void *dest_addr;
-       struct page *src_page;
-       struct page *dest_page;
-       loff_t cmp_len;
-       bool same;
-       int error;
-
-       error = -EINVAL;
-       same = true;
-       while (len) {
-               src_poff = srcoff & (PAGE_SIZE - 1);
-               dest_poff = destoff & (PAGE_SIZE - 1);
-               cmp_len = min(PAGE_SIZE - src_poff,
-                             PAGE_SIZE - dest_poff);
-               cmp_len = min(cmp_len, len);
-               if (cmp_len <= 0)
-                       goto out_error;
-
-               src_page = vfs_dedupe_get_page(src, srcoff);
-               if (IS_ERR(src_page)) {
-                       error = PTR_ERR(src_page);
-                       goto out_error;
-               }
-               dest_page = vfs_dedupe_get_page(dest, destoff);
-               if (IS_ERR(dest_page)) {
-                       error = PTR_ERR(dest_page);
-                       unlock_page(src_page);
-                       put_page(src_page);
-                       goto out_error;
-               }
-               src_addr = kmap_atomic(src_page);
-               dest_addr = kmap_atomic(dest_page);
+       loff_t ret;
 
-               flush_dcache_page(src_page);
-               flush_dcache_page(dest_page);
-
-               if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
-                       same = false;
-
-               kunmap_atomic(dest_addr);
-               kunmap_atomic(src_addr);
-               unlock_page(dest_page);
-               unlock_page(src_page);
-               put_page(dest_page);
-               put_page(src_page);
-
-               if (!same)
-                       break;
-
-               srcoff += cmp_len;
-               destoff += cmp_len;
-               len -= cmp_len;
-       }
-
-       *is_same = same;
-       return 0;
-
-out_error:
-       return error;
-}
-EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
-
-int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-                             struct file *dst_file, loff_t dst_pos, u64 len)
-{
-       s64 ret;
+       WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
+                                    REMAP_FILE_CAN_SHORTEN));
 
        ret = mnt_want_write_file(dst_file);
        if (ret)
                return ret;
 
-       ret = clone_verify_area(dst_file, dst_pos, len, true);
+       ret = remap_verify_area(dst_file, dst_pos, len, true);
        if (ret < 0)
                goto out_drop_write;
 
-       ret = -EINVAL;
-       if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE)))
+       ret = -EPERM;
+       if (!allow_file_dedupe(dst_file))
                goto out_drop_write;
 
        ret = -EXDEV;
@@ -2003,11 +2057,16 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
                goto out_drop_write;
 
        ret = -EINVAL;
-       if (!dst_file->f_op->dedupe_file_range)
+       if (!dst_file->f_op->remap_file_range)
                goto out_drop_write;
 
-       ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
-                                               dst_file, dst_pos, len);
+       if (len == 0) {
+               ret = 0;
+               goto out_drop_write;
+       }
+
+       ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
+                       dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
 out_drop_write:
        mnt_drop_write_file(dst_file);
 
@@ -2024,7 +2083,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
        int i;
        int ret;
        u16 count = same->dest_count;
-       int deduped;
+       loff_t deduped;
 
        if (!(file->f_mode & FMODE_READ))
                return -EINVAL;
@@ -2043,7 +2102,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
        if (!S_ISREG(src->i_mode))
                goto out;
 
-       ret = clone_verify_area(file, off, len, false);
+       ret = remap_verify_area(file, off, len, false);
        if (ret < 0)
                goto out;
        ret = 0;
@@ -2075,7 +2134,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
                }
 
                deduped = vfs_dedupe_file_range_one(file, off, dst_file,
-                                                   info->dest_offset, len);
+                                                   info->dest_offset, len,
+                                                   REMAP_FILE_CAN_SHORTEN);
                if (deduped == -EBADE)
                        info->status = FILE_DEDUPE_RANGE_DIFFERS;
                else if (deduped < 0)
index b3daa971f59771d6adf248a192db7d6e3121b015..3553f1956508daeca04aa1acaa1aaab61cdbb6e5 100644 (file)
@@ -301,7 +301,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
        struct kiocb kiocb;
        int idx, ret;
 
-       iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
+       iov_iter_pipe(&to, READ, pipe, len);
        idx = to.idx;
        init_sync_kiocb(&kiocb, in);
        kiocb.ki_pos = *ppos;
@@ -386,7 +386,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
         */
        offset = *ppos & ~PAGE_MASK;
 
-       iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
+       iov_iter_pipe(&to, READ, pipe, len + offset);
 
        res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
        if (res <= 0)
@@ -745,8 +745,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                        left -= this_len;
                }
 
-               iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n,
-                             sd.total_len - left);
+               iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left);
                ret = vfs_iter_write(out, &from, &sd.pos, 0);
                if (ret <= 0)
                        break;
index bbc78549be4cc3f0536033624723666ab13ef0d0..529856fbccd0ee5f6559519a3d9db12cf932a775 100644 (file)
@@ -7,6 +7,7 @@ config UBIFS_FS
        select CRYPTO if UBIFS_FS_ZLIB
        select CRYPTO_LZO if UBIFS_FS_LZO
        select CRYPTO_DEFLATE if UBIFS_FS_ZLIB
+       select CRYPTO_HASH_INFO
        depends on MTD_UBI
        help
          UBIFS is a file system for flash devices which works on top of UBI.
@@ -85,3 +86,13 @@ config UBIFS_FS_SECURITY
          the extended attribute support in advance.
 
          If you are not using a security module, say N.
+
+config UBIFS_FS_AUTHENTICATION
+       bool "UBIFS authentication support"
+       select CRYPTO_HMAC
+       help
+         Enable authentication support for UBIFS. This feature offers protection
+         against offline changes for both data and metadata of the filesystem.
+         If you say yes here you should also select a hashing algorithm such as
+         sha256, these are not selected automatically since there are many
+         different options.
index 6197d7e539e42d872a07763e524e1aad284d7a82..5f838319c8d533858dcf42b34631275ac8683797 100644 (file)
@@ -8,3 +8,4 @@ ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o debug.o
 ubifs-y += misc.o
 ubifs-$(CONFIG_UBIFS_FS_ENCRYPTION) += crypto.o
 ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o
+ubifs-$(CONFIG_UBIFS_FS_AUTHENTICATION) += auth.o
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
new file mode 100644 (file)
index 0000000..124e965
--- /dev/null
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2018 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ */
+
+/*
+ * This file implements various helper functions for UBIFS authentication support
+ */
+
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/algapi.h>
+#include <keys/user-type.h>
+
+#include "ubifs.h"
+
+/**
+ * ubifs_node_calc_hash - calculate the hash of a UBIFS node
+ * @c: UBIFS file-system description object
+ * @node: the node to calculate a hash for
+ * @hash: the returned hash
+ *
+ * Returns 0 for success or a negative error code otherwise.
+ */
+int __ubifs_node_calc_hash(const struct ubifs_info *c, const void *node,
+                           u8 *hash)
+{
+       const struct ubifs_ch *ch = node;
+       SHASH_DESC_ON_STACK(shash, c->hash_tfm);
+       int err;
+
+       shash->tfm = c->hash_tfm;
+       shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       err = crypto_shash_digest(shash, node, le32_to_cpu(ch->len), hash);
+       if (err < 0)
+               return err;
+       return 0;
+}
+
+/**
+ * ubifs_hash_calc_hmac - calculate a HMAC from a hash
+ * @c: UBIFS file-system description object
+ * @hash: the node to calculate a HMAC for
+ * @hmac: the returned HMAC
+ *
+ * Returns 0 for success or a negative error code otherwise.
+ */
+static int ubifs_hash_calc_hmac(const struct ubifs_info *c, const u8 *hash,
+                                u8 *hmac)
+{
+       SHASH_DESC_ON_STACK(shash, c->hmac_tfm);
+       int err;
+
+       shash->tfm = c->hmac_tfm;
+       shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       err = crypto_shash_digest(shash, hash, c->hash_len, hmac);
+       if (err < 0)
+               return err;
+       return 0;
+}
+
+/**
+ * ubifs_prepare_auth_node - Prepare an authentication node
+ * @c: UBIFS file-system description object
+ * @node: the node to calculate a hash for
+ * @hash: input hash of previous nodes
+ *
+ * This function prepares an authentication node for writing onto flash.
+ * It creates a HMAC from the given input hash and writes it to the node.
+ *
+ * Returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_prepare_auth_node(struct ubifs_info *c, void *node,
+                            struct shash_desc *inhash)
+{
+       SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
+       struct ubifs_auth_node *auth = node;
+       u8 *hash;
+       int err;
+
+       hash = kmalloc(crypto_shash_descsize(c->hash_tfm), GFP_NOFS);
+       if (!hash)
+               return -ENOMEM;
+
+       hash_desc->tfm = c->hash_tfm;
+       hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+       ubifs_shash_copy_state(c, inhash, hash_desc);
+
+       err = crypto_shash_final(hash_desc, hash);
+       if (err)
+               goto out;
+
+       err = ubifs_hash_calc_hmac(c, hash, auth->hmac);
+       if (err)
+               goto out;
+
+       auth->ch.node_type = UBIFS_AUTH_NODE;
+       ubifs_prepare_node(c, auth, ubifs_auth_node_sz(c), 0);
+
+       err = 0;
+out:
+       kfree(hash);
+
+       return err;
+}
+
+static struct shash_desc *ubifs_get_desc(const struct ubifs_info *c,
+                                        struct crypto_shash *tfm)
+{
+       struct shash_desc *desc;
+       int err;
+
+       if (!ubifs_authenticated(c))
+               return NULL;
+
+       desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(tfm), GFP_KERNEL);
+       if (!desc)
+               return ERR_PTR(-ENOMEM);
+
+       desc->tfm = tfm;
+       desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       err = crypto_shash_init(desc);
+       if (err) {
+               kfree(desc);
+               return ERR_PTR(err);
+       }
+
+       return desc;
+}
+
+/**
+ * __ubifs_hash_get_desc - get a descriptor suitable for hashing a node
+ * @c: UBIFS file-system description object
+ *
+ * This function returns a descriptor suitable for hashing a node. Free after use
+ * with kfree.
+ */
+struct shash_desc *__ubifs_hash_get_desc(const struct ubifs_info *c)
+{
+       return ubifs_get_desc(c, c->hash_tfm);
+}
+
+/**
+ * __ubifs_shash_final - finalize shash
+ * @c: UBIFS file-system description object
+ * @desc: the descriptor
+ * @out: the output hash
+ *
+ * Simple wrapper around crypto_shash_final(), safe to be called with
+ * disabled authentication.
+ */
+int __ubifs_shash_final(const struct ubifs_info *c, struct shash_desc *desc,
+                       u8 *out)
+{
+       if (ubifs_authenticated(c))
+               return crypto_shash_final(desc, out);
+
+       return 0;
+}
+
+/**
+ * ubifs_bad_hash - Report hash mismatches
+ * @c: UBIFS file-system description object
+ * @node: the node
+ * @hash: the expected hash
+ * @lnum: the LEB @node was read from
+ * @offs: offset in LEB @node was read from
+ *
+ * This function reports a hash mismatch when a node has a different hash than
+ * expected.
+ */
+void ubifs_bad_hash(const struct ubifs_info *c, const void *node, const u8 *hash,
+                   int lnum, int offs)
+{
+       int len = min(c->hash_len, 20);
+       int cropped = len != c->hash_len;
+       const char *cont = cropped ? "..." : "";
+
+       u8 calc[UBIFS_HASH_ARR_SZ];
+
+       __ubifs_node_calc_hash(c, node, calc);
+
+       ubifs_err(c, "hash mismatch on node at LEB %d:%d", lnum, offs);
+       ubifs_err(c, "hash expected:   %*ph%s", len, hash, cont);
+       ubifs_err(c, "hash calculated: %*ph%s", len, calc, cont);
+}
+
+/**
+ * __ubifs_node_check_hash - check the hash of a node against given hash
+ * @c: UBIFS file-system description object
+ * @node: the node
+ * @expected: the expected hash
+ *
+ * This function calculates a hash over a node and compares it to the given hash.
+ * Returns 0 if both hashes are equal or authentication is disabled, otherwise a
+ * negative error code is returned.
+ */
+int __ubifs_node_check_hash(const struct ubifs_info *c, const void *node,
+                           const u8 *expected)
+{
+       u8 calc[UBIFS_HASH_ARR_SZ];
+       int err;
+
+       err = __ubifs_node_calc_hash(c, node, calc);
+       if (err)
+               return err;
+
+       if (ubifs_check_hash(c, expected, calc))
+               return -EPERM;
+
+       return 0;
+}
+
+/**
+ * ubifs_init_authentication - initialize UBIFS authentication support
+ * @c: UBIFS file-system description object
+ *
+ * This function returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_init_authentication(struct ubifs_info *c)
+{
+       struct key *keyring_key;
+       const struct user_key_payload *ukp;
+       int err;
+       char hmac_name[CRYPTO_MAX_ALG_NAME];
+
+       if (!c->auth_hash_name) {
+               ubifs_err(c, "authentication hash name needed with authentication");
+               return -EINVAL;
+       }
+
+       c->auth_hash_algo = match_string(hash_algo_name, HASH_ALGO__LAST,
+                                        c->auth_hash_name);
+       if ((int)c->auth_hash_algo < 0) {
+               ubifs_err(c, "Unknown hash algo %s specified",
+                         c->auth_hash_name);
+               return -EINVAL;
+       }
+
+       snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
+                c->auth_hash_name);
+
+       keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL);
+
+       if (IS_ERR(keyring_key)) {
+               ubifs_err(c, "Failed to request key: %ld",
+                         PTR_ERR(keyring_key));
+               return PTR_ERR(keyring_key);
+       }
+
+       down_read(&keyring_key->sem);
+
+       if (keyring_key->type != &key_type_logon) {
+               ubifs_err(c, "key type must be logon");
+               err = -ENOKEY;
+               goto out;
+       }
+
+       ukp = user_key_payload_locked(keyring_key);
+       if (!ukp) {
+               /* key was revoked before we acquired its semaphore */
+               err = -EKEYREVOKED;
+               goto out;
+       }
+
+       c->hash_tfm = crypto_alloc_shash(c->auth_hash_name, 0,
+                                        CRYPTO_ALG_ASYNC);
+       if (IS_ERR(c->hash_tfm)) {
+               err = PTR_ERR(c->hash_tfm);
+               ubifs_err(c, "Can not allocate %s: %d",
+                         c->auth_hash_name, err);
+               goto out;
+       }
+
+       c->hash_len = crypto_shash_digestsize(c->hash_tfm);
+       if (c->hash_len > UBIFS_HASH_ARR_SZ) {
+               ubifs_err(c, "hash %s is bigger than maximum allowed hash size (%d > %d)",
+                         c->auth_hash_name, c->hash_len, UBIFS_HASH_ARR_SZ);
+               err = -EINVAL;
+               goto out_free_hash;
+       }
+
+       c->hmac_tfm = crypto_alloc_shash(hmac_name, 0, CRYPTO_ALG_ASYNC);
+       if (IS_ERR(c->hmac_tfm)) {
+               err = PTR_ERR(c->hmac_tfm);
+               ubifs_err(c, "Can not allocate %s: %d", hmac_name, err);
+               goto out_free_hash;
+       }
+
+       c->hmac_desc_len = crypto_shash_digestsize(c->hmac_tfm);
+       if (c->hmac_desc_len > UBIFS_HMAC_ARR_SZ) {
+               ubifs_err(c, "hmac %s is bigger than maximum allowed hmac size (%d > %d)",
+                         hmac_name, c->hmac_desc_len, UBIFS_HMAC_ARR_SZ);
+               err = -EINVAL;
+               goto out_free_hash;
+       }
+
+       err = crypto_shash_setkey(c->hmac_tfm, ukp->data, ukp->datalen);
+       if (err)
+               goto out_free_hmac;
+
+       c->authenticated = true;
+
+       c->log_hash = ubifs_hash_get_desc(c);
+       if (IS_ERR(c->log_hash))
+               goto out_free_hmac;
+
+       err = 0;
+
+out_free_hmac:
+       if (err)
+               crypto_free_shash(c->hmac_tfm);
+out_free_hash:
+       if (err)
+               crypto_free_shash(c->hash_tfm);
+out:
+       up_read(&keyring_key->sem);
+       key_put(keyring_key);
+
+       return err;
+}
+
+/**
+ * __ubifs_exit_authentication - release resource
+ * @c: UBIFS file-system description object
+ *
+ * This function releases the authentication related resources.
+ */
+void __ubifs_exit_authentication(struct ubifs_info *c)
+{
+       if (!ubifs_authenticated(c))
+               return;
+
+       crypto_free_shash(c->hmac_tfm);
+       crypto_free_shash(c->hash_tfm);
+       kfree(c->log_hash);
+}
+
+/**
+ * ubifs_node_calc_hmac - calculate the HMAC of a UBIFS node
+ * @c: UBIFS file-system description object
+ * @node: the node to insert a HMAC into.
+ * @len: the length of the node
+ * @ofs_hmac: the offset in the node where the HMAC is inserted
+ * @hmac: returned HMAC
+ *
+ * This function calculates a HMAC of a UBIFS node. The HMAC is expected to be
+ * embedded into the node, so this area is not covered by the HMAC. Also not
+ * covered is the UBIFS_NODE_MAGIC and the CRC of the node.
+ */
+static int ubifs_node_calc_hmac(const struct ubifs_info *c, const void *node,
+                               int len, int ofs_hmac, void *hmac)
+{
+       SHASH_DESC_ON_STACK(shash, c->hmac_tfm);
+       int hmac_len = c->hmac_desc_len;
+       int err;
+
+       ubifs_assert(c, ofs_hmac > 8);
+       ubifs_assert(c, ofs_hmac + hmac_len < len);
+
+       shash->tfm = c->hmac_tfm;
+       shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       err = crypto_shash_init(shash);
+       if (err)
+               return err;
+
+       /* behind common node header CRC up to HMAC begin */
+       err = crypto_shash_update(shash, node + 8, ofs_hmac - 8);
+       if (err < 0)
+               return err;
+
+       /* behind HMAC, if any */
+       if (len - ofs_hmac - hmac_len > 0) {
+               err = crypto_shash_update(shash, node + ofs_hmac + hmac_len,
+                           len - ofs_hmac - hmac_len);
+               if (err < 0)
+                       return err;
+       }
+
+       return crypto_shash_final(shash, hmac);
+}
+
+/**
+ * __ubifs_node_insert_hmac - insert a HMAC into a UBIFS node
+ * @c: UBIFS file-system description object
+ * @node: the node to insert a HMAC into.
+ * @len: the length of the node
+ * @ofs_hmac: the offset in the node where the HMAC is inserted
+ *
+ * This function inserts a HMAC at offset @ofs_hmac into the node given in
+ * @node.
+ *
+ * This function returns 0 for success or a negative error code otherwise.
+ */
+int __ubifs_node_insert_hmac(const struct ubifs_info *c, void *node, int len,
+                           int ofs_hmac)
+{
+       return ubifs_node_calc_hmac(c, node, len, ofs_hmac, node + ofs_hmac);
+}
+
+/**
+ * __ubifs_node_verify_hmac - verify the HMAC of UBIFS node
+ * @c: UBIFS file-system description object
+ * @node: the node to insert a HMAC into.
+ * @len: the length of the node
+ * @ofs_hmac: the offset in the node where the HMAC is inserted
+ *
+ * This function verifies the HMAC at offset @ofs_hmac of the node given in
+ * @node. Returns 0 if successful or a negative error code otherwise.
+ */
+int __ubifs_node_verify_hmac(const struct ubifs_info *c, const void *node,
+                            int len, int ofs_hmac)
+{
+       int hmac_len = c->hmac_desc_len;
+       u8 *hmac;
+       int err;
+
+       hmac = kmalloc(hmac_len, GFP_NOFS);
+       if (!hmac)
+               return -ENOMEM;
+
+       err = ubifs_node_calc_hmac(c, node, len, ofs_hmac, hmac);
+       if (err)
+               return err;
+
+       err = crypto_memneq(hmac, node + ofs_hmac, hmac_len);
+
+       kfree(hmac);
+
+       if (!err)
+               return 0;
+
+       return -EPERM;
+}
+
+int __ubifs_shash_copy_state(const struct ubifs_info *c, struct shash_desc *src,
+                            struct shash_desc *target)
+{
+       u8 *state;
+       int err;
+
+       state = kmalloc(crypto_shash_descsize(src->tfm), GFP_NOFS);
+       if (!state)
+               return -ENOMEM;
+
+       err = crypto_shash_export(src, state);
+       if (err)
+               goto out;
+
+       err = crypto_shash_import(target, state);
+
+out:
+       kfree(state);
+
+       return err;
+}
+
+/**
+ * ubifs_hmac_wkm - Create a HMAC of the well known message
+ * @c: UBIFS file-system description object
+ * @hmac: The HMAC of the well known message
+ *
+ * This function creates a HMAC of a well known message. This is used
+ * to check if the provided key is suitable to authenticate a UBIFS
+ * image. This is only a convenience to the user to provide a better
+ * error message when the wrong key is provided.
+ *
+ * This function returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac)
+{
+       SHASH_DESC_ON_STACK(shash, c->hmac_tfm);
+       int err;
+       const char well_known_message[] = "UBIFS";
+
+       if (!ubifs_authenticated(c))
+               return 0;
+
+       shash->tfm = c->hmac_tfm;
+       shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       err = crypto_shash_init(shash);
+       if (err)
+               return err;
+
+       err = crypto_shash_update(shash, well_known_message,
+                                 sizeof(well_known_message) - 1);
+       if (err < 0)
+               return err;
+
+       err = crypto_shash_final(shash, hmac);
+       if (err)
+               return err;
+       return 0;
+}
index 564e330d05b146df6d8c848742b08f02820b14c3..c49ff50fdceb1d8f90f059099cd87d6e4334555b 100644 (file)
@@ -165,6 +165,8 @@ const char *dbg_ntype(int type)
                return "commit start node";
        case UBIFS_ORPH_NODE:
                return "orphan node";
+       case UBIFS_AUTH_NODE:
+               return "auth node";
        default:
                return "unknown node";
        }
@@ -542,6 +544,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
                               (unsigned long long)le64_to_cpu(orph->inos[i]));
                break;
        }
+       case UBIFS_AUTH_NODE:
+       {
+               break;
+       }
        default:
                pr_err("node type %d was not recognized\n",
                       (int)ch->node_type);
index d2680e0b4a36f38826f253d33c1b7d258b21bb2d..bf75fdc76fc357f7d8da299405fce8455a829e8d 100644 (file)
@@ -254,7 +254,8 @@ static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
                             snod->type == UBIFS_DATA_NODE ||
                             snod->type == UBIFS_DENT_NODE ||
                             snod->type == UBIFS_XENT_NODE ||
-                            snod->type == UBIFS_TRUN_NODE);
+                            snod->type == UBIFS_TRUN_NODE ||
+                            snod->type == UBIFS_AUTH_NODE);
 
                if (snod->type != UBIFS_INO_NODE  &&
                    snod->type != UBIFS_DATA_NODE &&
@@ -364,12 +365,13 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 
        /* Write nodes to their new location. Use the first-fit strategy */
        while (1) {
-               int avail;
+               int avail, moved = 0;
                struct ubifs_scan_node *snod, *tmp;
 
                /* Move data nodes */
                list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
-                       avail = c->leb_size - wbuf->offs - wbuf->used;
+                       avail = c->leb_size - wbuf->offs - wbuf->used -
+                                       ubifs_auth_node_sz(c);
                        if  (snod->len > avail)
                                /*
                                 * Do not skip data nodes in order to optimize
@@ -377,14 +379,21 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
                                 */
                                break;
 
+                       err = ubifs_shash_update(c, c->jheads[GCHD].log_hash,
+                                                snod->node, snod->len);
+                       if (err)
+                               goto out;
+
                        err = move_node(c, sleb, snod, wbuf);
                        if (err)
                                goto out;
+                       moved = 1;
                }
 
                /* Move non-data nodes */
                list_for_each_entry_safe(snod, tmp, &nondata, list) {
-                       avail = c->leb_size - wbuf->offs - wbuf->used;
+                       avail = c->leb_size - wbuf->offs - wbuf->used -
+                                       ubifs_auth_node_sz(c);
                        if (avail < min)
                                break;
 
@@ -402,9 +411,41 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
                                continue;
                        }
 
+                       err = ubifs_shash_update(c, c->jheads[GCHD].log_hash,
+                                                snod->node, snod->len);
+                       if (err)
+                               goto out;
+
                        err = move_node(c, sleb, snod, wbuf);
                        if (err)
                                goto out;
+                       moved = 1;
+               }
+
+               if (ubifs_authenticated(c) && moved) {
+                       struct ubifs_auth_node *auth;
+
+                       auth = kmalloc(ubifs_auth_node_sz(c), GFP_NOFS);
+                       if (!auth) {
+                               err = -ENOMEM;
+                               goto out;
+                       }
+
+                       err = ubifs_prepare_auth_node(c, auth,
+                                               c->jheads[GCHD].log_hash);
+                       if (err) {
+                               kfree(auth);
+                               goto out;
+                       }
+
+                       err = ubifs_wbuf_write_nolock(wbuf, auth,
+                                                     ubifs_auth_node_sz(c));
+                       if (err) {
+                               kfree(auth);
+                               goto out;
+                       }
+
+                       ubifs_add_dirt(c, wbuf->lnum, ubifs_auth_node_sz(c));
                }
 
                if (list_empty(&sleb->nodes) && list_empty(&nondata))
index 099bec94b82079f8fbd03f0fb74aee2180d1dab3..d124117efd42dc8cea8aa25c8c455012b8fbaebd 100644 (file)
@@ -365,20 +365,8 @@ static unsigned long long next_sqnum(struct ubifs_info *c)
        return sqnum;
 }
 
-/**
- * ubifs_prepare_node - prepare node to be written to flash.
- * @c: UBIFS file-system description object
- * @node: the node to pad
- * @len: node length
- * @pad: if the buffer has to be padded
- *
- * This function prepares node at @node to be written to the media - it
- * calculates node CRC, fills the common header, and adds proper padding up to
- * the next minimum I/O unit if @pad is not zero.
- */
-void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
+void ubifs_init_node(struct ubifs_info *c, void *node, int len, int pad)
 {
-       uint32_t crc;
        struct ubifs_ch *ch = node;
        unsigned long long sqnum = next_sqnum(c);
 
@@ -389,8 +377,6 @@ void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
        ch->group_type = UBIFS_NO_NODE_GROUP;
        ch->sqnum = cpu_to_le64(sqnum);
        ch->padding[0] = ch->padding[1] = 0;
-       crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
-       ch->crc = cpu_to_le32(crc);
 
        if (pad) {
                len = ALIGN(len, 8);
@@ -399,6 +385,68 @@ void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
        }
 }
 
+void ubifs_crc_node(struct ubifs_info *c, void *node, int len)
+{
+       struct ubifs_ch *ch = node;
+       uint32_t crc;
+
+       crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
+       ch->crc = cpu_to_le32(crc);
+}
+
+/**
+ * ubifs_prepare_node_hmac - prepare node to be written to flash.
+ * @c: UBIFS file-system description object
+ * @node: the node to pad
+ * @len: node length
+ * @hmac_offs: offset of the HMAC in the node
+ * @pad: if the buffer has to be padded
+ *
+ * This function prepares node at @node to be written to the media - it
+ * calculates node CRC, fills the common header, and adds proper padding up to
+ * the next minimum I/O unit if @pad is not zero. if @hmac_offs is positive then
+ * a HMAC is inserted into the node at the given offset.
+ *
+ * This function returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len,
+                           int hmac_offs, int pad)
+{
+       int err;
+
+       ubifs_init_node(c, node, len, pad);
+
+       if (hmac_offs > 0) {
+               err = ubifs_node_insert_hmac(c, node, len, hmac_offs);
+               if (err)
+                       return err;
+       }
+
+       ubifs_crc_node(c, node, len);
+
+       return 0;
+}
+
+/**
+ * ubifs_prepare_node - prepare node to be written to flash.
+ * @c: UBIFS file-system description object
+ * @node: the node to pad
+ * @len: node length
+ * @pad: if the buffer has to be padded
+ *
+ * This function prepares node at @node to be written to the media - it
+ * calculates node CRC, fills the common header, and adds proper padding up to
+ * the next minimum I/O unit if @pad is not zero.
+ */
+void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
+{
+       /*
+        * Deliberately ignore return value since this function can only fail
+        * when a hmac offset is given.
+        */
+       ubifs_prepare_node_hmac(c, node, len, 0, pad);
+}
+
 /**
  * ubifs_prep_grp_node - prepare node of a group to be written to flash.
  * @c: UBIFS file-system description object
@@ -849,12 +897,13 @@ out:
 }
 
 /**
- * ubifs_write_node - write node to the media.
+ * ubifs_write_node_hmac - write node to the media.
  * @c: UBIFS file-system description object
  * @buf: the node to write
  * @len: node length
  * @lnum: logical eraseblock number
  * @offs: offset within the logical eraseblock
+ * @hmac_offs: offset of the HMAC within the node
  *
  * This function automatically fills node magic number, assigns sequence
  * number, and calculates node CRC checksum. The length of the @buf buffer has
@@ -862,8 +911,8 @@ out:
  * appends padding node and padding bytes if needed. Returns zero in case of
  * success and a negative error code in case of failure.
  */
-int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
-                    int offs)
+int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum,
+                         int offs, int hmac_offs)
 {
        int err, buf_len = ALIGN(len, c->min_io_size);
 
@@ -878,7 +927,10 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
        if (c->ro_error)
                return -EROFS;
 
-       ubifs_prepare_node(c, buf, len, 1);
+       err = ubifs_prepare_node_hmac(c, buf, len, hmac_offs, 1);
+       if (err)
+               return err;
+
        err = ubifs_leb_write(c, lnum, buf, offs, buf_len);
        if (err)
                ubifs_dump_node(c, buf);
@@ -886,6 +938,26 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
        return err;
 }
 
+/**
+ * ubifs_write_node - write node to the media.
+ * @c: UBIFS file-system description object
+ * @buf: the node to write
+ * @len: node length
+ * @lnum: logical eraseblock number
+ * @offs: offset within the logical eraseblock
+ *
+ * This function automatically fills node magic number, assigns sequence
+ * number, and calculates node CRC checksum. The length of the @buf buffer has
+ * to be aligned to the minimal I/O unit size. This function automatically
+ * appends padding node and padding bytes if needed. Returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
+                    int offs)
+{
+       return ubifs_write_node_hmac(c, buf, len, lnum, offs, -1);
+}
+
 /**
  * ubifs_read_node_wbuf - read node from the media or write-buffer.
  * @wbuf: wbuf to check for un-written data
index 802565a17733ce4b0e304df7daffda9517252bca..729dc76c83dffb850354521f96f7cdfabf860051 100644 (file)
@@ -90,6 +90,12 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
        memset(trun->padding, 0, 12);
 }
 
+static void ubifs_add_auth_dirt(struct ubifs_info *c, int lnum)
+{
+       if (ubifs_authenticated(c))
+               ubifs_add_dirt(c, lnum, ubifs_auth_node_sz(c));
+}
+
 /**
  * reserve_space - reserve space in the journal.
  * @c: UBIFS file-system description object
@@ -228,34 +234,33 @@ out_return:
        return err;
 }
 
-/**
- * write_node - write node to a journal head.
- * @c: UBIFS file-system description object
- * @jhead: journal head
- * @node: node to write
- * @len: node length
- * @lnum: LEB number written is returned here
- * @offs: offset written is returned here
- *
- * This function writes a node to reserved space of journal head @jhead.
- * Returns zero in case of success and a negative error code in case of
- * failure.
- */
-static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
-                     int *lnum, int *offs)
+static int ubifs_hash_nodes(struct ubifs_info *c, void *node,
+                            int len, struct shash_desc *hash)
 {
-       struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
+       int auth_node_size = ubifs_auth_node_sz(c);
+       int err;
 
-       ubifs_assert(c, jhead != GCHD);
+       while (1) {
+               const struct ubifs_ch *ch = node;
+               int nodelen = le32_to_cpu(ch->len);
 
-       *lnum = c->jheads[jhead].wbuf.lnum;
-       *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
+               ubifs_assert(c, len >= auth_node_size);
 
-       dbg_jnl("jhead %s, LEB %d:%d, len %d",
-               dbg_jhead(jhead), *lnum, *offs, len);
-       ubifs_prepare_node(c, node, len, 0);
+               if (len == auth_node_size)
+                       break;
+
+               ubifs_assert(c, len > nodelen);
+               ubifs_assert(c, ch->magic == cpu_to_le32(UBIFS_NODE_MAGIC));
 
-       return ubifs_wbuf_write_nolock(wbuf, node, len);
+               err = ubifs_shash_update(c, hash, (void *)node, nodelen);
+               if (err)
+                       return err;
+
+               node += ALIGN(nodelen, 8);
+               len -= ALIGN(nodelen, 8);
+       }
+
+       return ubifs_prepare_auth_node(c, node, hash);
 }
 
 /**
@@ -268,9 +273,9 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
  * @offs: offset written is returned here
  * @sync: non-zero if the write-buffer has to by synchronized
  *
- * This function is the same as 'write_node()' but it does not assume the
- * buffer it is writing is a node, so it does not prepare it (which means
- * initializing common header and calculating CRC).
+ * This function writes data to the reserved space of journal head @jhead.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
  */
 static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
                      int *lnum, int *offs, int sync)
@@ -285,6 +290,12 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
        dbg_jnl("jhead %s, LEB %d:%d, len %d",
                dbg_jhead(jhead), *lnum, *offs, len);
 
+       if (ubifs_authenticated(c)) {
+               err = ubifs_hash_nodes(c, buf, len, c->jheads[jhead].log_hash);
+               if (err)
+                       return err;
+       }
+
        err = ubifs_wbuf_write_nolock(wbuf, buf, len);
        if (err)
                return err;
@@ -548,6 +559,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
        struct ubifs_dent_node *dent;
        struct ubifs_ino_node *ino;
        union ubifs_key dent_key, ino_key;
+       u8 hash_dent[UBIFS_HASH_ARR_SZ];
+       u8 hash_ino[UBIFS_HASH_ARR_SZ];
+       u8 hash_ino_host[UBIFS_HASH_ARR_SZ];
 
        ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex));
 
@@ -570,7 +584,10 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 
        len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ;
        /* Make sure to also account for extended attributes */
-       len += host_ui->data_len;
+       if (ubifs_authenticated(c))
+               len += ALIGN(host_ui->data_len, 8) + ubifs_auth_node_sz(c);
+       else
+               len += host_ui->data_len;
 
        dent = kzalloc(len, GFP_NOFS);
        if (!dent)
@@ -602,11 +619,21 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 
        zero_dent_node_unused(dent);
        ubifs_prep_grp_node(c, dent, dlen, 0);
+       err = ubifs_node_calc_hash(c, dent, hash_dent);
+       if (err)
+               goto out_release;
 
        ino = (void *)dent + aligned_dlen;
        pack_inode(c, ino, inode, 0);
+       err = ubifs_node_calc_hash(c, ino, hash_ino);
+       if (err)
+               goto out_release;
+
        ino = (void *)ino + aligned_ilen;
        pack_inode(c, ino, dir, 1);
+       err = ubifs_node_calc_hash(c, ino, hash_ino_host);
+       if (err)
+               goto out_release;
 
        if (last_reference) {
                err = ubifs_add_orphan(c, inode->i_ino);
@@ -628,6 +655,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
        }
        release_head(c, BASEHD);
        kfree(dent);
+       ubifs_add_auth_dirt(c, lnum);
 
        if (deletion) {
                if (nm->hash)
@@ -638,7 +666,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
                        goto out_ro;
                err = ubifs_add_dirt(c, lnum, dlen);
        } else
-               err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm);
+               err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen,
+                                      hash_dent, nm);
        if (err)
                goto out_ro;
 
@@ -650,14 +679,14 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
         */
        ino_key_init(c, &ino_key, inode->i_ino);
        ino_offs = dent_offs + aligned_dlen;
-       err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen);
+       err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen, hash_ino);
        if (err)
                goto out_ro;
 
        ino_key_init(c, &ino_key, dir->i_ino);
        ino_offs += aligned_ilen;
        err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs,
-                           UBIFS_INO_NODE_SZ + host_ui->data_len);
+                           UBIFS_INO_NODE_SZ + host_ui->data_len, hash_ino_host);
        if (err)
                goto out_ro;
 
@@ -706,10 +735,12 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
                         const union ubifs_key *key, const void *buf, int len)
 {
        struct ubifs_data_node *data;
-       int err, lnum, offs, compr_type, out_len, compr_len;
+       int err, lnum, offs, compr_type, out_len, compr_len, auth_len;
        int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
+       int write_len;
        struct ubifs_inode *ui = ubifs_inode(inode);
        bool encrypted = ubifs_crypt_is_encrypted(inode);
+       u8 hash[UBIFS_HASH_ARR_SZ];
 
        dbg_jnlk(key, "ino %lu, blk %u, len %d, key ",
                (unsigned long)key_inum(c, key), key_block(c, key), len);
@@ -718,7 +749,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
        if (encrypted)
                dlen += UBIFS_CIPHER_BLOCK_SIZE;
 
-       data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
+       auth_len = ubifs_auth_node_sz(c);
+
+       data = kmalloc(dlen + auth_len, GFP_NOFS | __GFP_NOWARN);
        if (!data) {
                /*
                 * Fall-back to the write reserve buffer. Note, we might be
@@ -757,20 +790,33 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
        }
 
        dlen = UBIFS_DATA_NODE_SZ + out_len;
+       if (ubifs_authenticated(c))
+               write_len = ALIGN(dlen, 8) + auth_len;
+       else
+               write_len = dlen;
+
        data->compr_type = cpu_to_le16(compr_type);
 
        /* Make reservation before allocating sequence numbers */
-       err = make_reservation(c, DATAHD, dlen);
+       err = make_reservation(c, DATAHD, write_len);
        if (err)
                goto out_free;
 
-       err = write_node(c, DATAHD, data, dlen, &lnum, &offs);
+       ubifs_prepare_node(c, data, dlen, 0);
+       err = write_head(c, DATAHD, data, write_len, &lnum, &offs, 0);
+       if (err)
+               goto out_release;
+
+       err = ubifs_node_calc_hash(c, data, hash);
        if (err)
                goto out_release;
+
        ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key));
        release_head(c, DATAHD);
 
-       err = ubifs_tnc_add(c, key, lnum, offs, dlen);
+       ubifs_add_auth_dirt(c, lnum);
+
+       err = ubifs_tnc_add(c, key, lnum, offs, dlen, hash);
        if (err)
                goto out_ro;
 
@@ -808,7 +854,9 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
        int err, lnum, offs;
        struct ubifs_ino_node *ino;
        struct ubifs_inode *ui = ubifs_inode(inode);
-       int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink;
+       int sync = 0, write_len, ilen = UBIFS_INO_NODE_SZ;
+       int last_reference = !inode->i_nlink;
+       u8 hash[UBIFS_HASH_ARR_SZ];
 
        dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
 
@@ -817,20 +865,30 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
         * need to synchronize the write-buffer either.
         */
        if (!last_reference) {
-               len += ui->data_len;
+               ilen += ui->data_len;
                sync = IS_SYNC(inode);
        }
-       ino = kmalloc(len, GFP_NOFS);
+
+       if (ubifs_authenticated(c))
+               write_len = ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
+       else
+               write_len = ilen;
+
+       ino = kmalloc(write_len, GFP_NOFS);
        if (!ino)
                return -ENOMEM;
 
        /* Make reservation before allocating sequence numbers */
-       err = make_reservation(c, BASEHD, len);
+       err = make_reservation(c, BASEHD, write_len);
        if (err)
                goto out_free;
 
        pack_inode(c, ino, inode, 1);
-       err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
+       err = ubifs_node_calc_hash(c, ino, hash);
+       if (err)
+               goto out_release;
+
+       err = write_head(c, BASEHD, ino, write_len, &lnum, &offs, sync);
        if (err)
                goto out_release;
        if (!sync)
@@ -838,17 +896,19 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
                                          inode->i_ino);
        release_head(c, BASEHD);
 
+       ubifs_add_auth_dirt(c, lnum);
+
        if (last_reference) {
                err = ubifs_tnc_remove_ino(c, inode->i_ino);
                if (err)
                        goto out_ro;
                ubifs_delete_orphan(c, inode->i_ino);
-               err = ubifs_add_dirt(c, lnum, len);
+               err = ubifs_add_dirt(c, lnum, ilen);
        } else {
                union ubifs_key key;
 
                ino_key_init(c, &key, inode->i_ino);
-               err = ubifs_tnc_add(c, &key, lnum, offs, len);
+               err = ubifs_tnc_add(c, &key, lnum, offs, ilen, hash);
        }
        if (err)
                goto out_ro;
@@ -958,6 +1018,10 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
        int aligned_dlen1, aligned_dlen2;
        int twoparents = (fst_dir != snd_dir);
        void *p;
+       u8 hash_dent1[UBIFS_HASH_ARR_SZ];
+       u8 hash_dent2[UBIFS_HASH_ARR_SZ];
+       u8 hash_p1[UBIFS_HASH_ARR_SZ];
+       u8 hash_p2[UBIFS_HASH_ARR_SZ];
 
        ubifs_assert(c, ubifs_inode(fst_dir)->data_len == 0);
        ubifs_assert(c, ubifs_inode(snd_dir)->data_len == 0);
@@ -973,6 +1037,8 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
        if (twoparents)
                len += plen;
 
+       len += ubifs_auth_node_sz(c);
+
        dent1 = kzalloc(len, GFP_NOFS);
        if (!dent1)
                return -ENOMEM;
@@ -993,6 +1059,9 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
        set_dent_cookie(c, dent1);
        zero_dent_node_unused(dent1);
        ubifs_prep_grp_node(c, dent1, dlen1, 0);
+       err = ubifs_node_calc_hash(c, dent1, hash_dent1);
+       if (err)
+               goto out_release;
 
        /* Make new dent for 2nd entry */
        dent2 = (void *)dent1 + aligned_dlen1;
@@ -1006,14 +1075,26 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
        set_dent_cookie(c, dent2);
        zero_dent_node_unused(dent2);
        ubifs_prep_grp_node(c, dent2, dlen2, 0);
+       err = ubifs_node_calc_hash(c, dent2, hash_dent2);
+       if (err)
+               goto out_release;
 
        p = (void *)dent2 + aligned_dlen2;
-       if (!twoparents)
+       if (!twoparents) {
                pack_inode(c, p, fst_dir, 1);
-       else {
+               err = ubifs_node_calc_hash(c, p, hash_p1);
+               if (err)
+                       goto out_release;
+       } else {
                pack_inode(c, p, fst_dir, 0);
+               err = ubifs_node_calc_hash(c, p, hash_p1);
+               if (err)
+                       goto out_release;
                p += ALIGN(plen, 8);
                pack_inode(c, p, snd_dir, 1);
+               err = ubifs_node_calc_hash(c, p, hash_p2);
+               if (err)
+                       goto out_release;
        }
 
        err = write_head(c, BASEHD, dent1, len, &lnum, &offs, sync);
@@ -1027,28 +1108,30 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
        }
        release_head(c, BASEHD);
 
+       ubifs_add_auth_dirt(c, lnum);
+
        dent_key_init(c, &key, snd_dir->i_ino, snd_nm);
-       err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, snd_nm);
+       err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, hash_dent1, snd_nm);
        if (err)
                goto out_ro;
 
        offs += aligned_dlen1;
        dent_key_init(c, &key, fst_dir->i_ino, fst_nm);
-       err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, fst_nm);
+       err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, fst_nm);
        if (err)
                goto out_ro;
 
        offs += aligned_dlen2;
 
        ino_key_init(c, &key, fst_dir->i_ino);
-       err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+       err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_p1);
        if (err)
                goto out_ro;
 
        if (twoparents) {
                offs += ALIGN(plen, 8);
                ino_key_init(c, &key, snd_dir->i_ino);
-               err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+               err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_p2);
                if (err)
                        goto out_ro;
        }
@@ -1101,6 +1184,11 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
        int last_reference = !!(new_inode && new_inode->i_nlink == 0);
        int move = (old_dir != new_dir);
        struct ubifs_inode *uninitialized_var(new_ui);
+       u8 hash_old_dir[UBIFS_HASH_ARR_SZ];
+       u8 hash_new_dir[UBIFS_HASH_ARR_SZ];
+       u8 hash_new_inode[UBIFS_HASH_ARR_SZ];
+       u8 hash_dent1[UBIFS_HASH_ARR_SZ];
+       u8 hash_dent2[UBIFS_HASH_ARR_SZ];
 
        ubifs_assert(c, ubifs_inode(old_dir)->data_len == 0);
        ubifs_assert(c, ubifs_inode(new_dir)->data_len == 0);
@@ -1123,6 +1211,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
        len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8);
        if (move)
                len += plen;
+
+       len += ubifs_auth_node_sz(c);
+
        dent = kzalloc(len, GFP_NOFS);
        if (!dent)
                return -ENOMEM;
@@ -1143,6 +1234,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
        set_dent_cookie(c, dent);
        zero_dent_node_unused(dent);
        ubifs_prep_grp_node(c, dent, dlen1, 0);
+       err = ubifs_node_calc_hash(c, dent, hash_dent1);
+       if (err)
+               goto out_release;
 
        dent2 = (void *)dent + aligned_dlen1;
        dent2->ch.node_type = UBIFS_DENT_NODE;
@@ -1162,19 +1256,36 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
        set_dent_cookie(c, dent2);
        zero_dent_node_unused(dent2);
        ubifs_prep_grp_node(c, dent2, dlen2, 0);
+       err = ubifs_node_calc_hash(c, dent2, hash_dent2);
+       if (err)
+               goto out_release;
 
        p = (void *)dent2 + aligned_dlen2;
        if (new_inode) {
                pack_inode(c, p, new_inode, 0);
+               err = ubifs_node_calc_hash(c, p, hash_new_inode);
+               if (err)
+                       goto out_release;
+
                p += ALIGN(ilen, 8);
        }
 
-       if (!move)
+       if (!move) {
                pack_inode(c, p, old_dir, 1);
-       else {
+               err = ubifs_node_calc_hash(c, p, hash_old_dir);
+               if (err)
+                       goto out_release;
+       } else {
                pack_inode(c, p, old_dir, 0);
+               err = ubifs_node_calc_hash(c, p, hash_old_dir);
+               if (err)
+                       goto out_release;
+
                p += ALIGN(plen, 8);
                pack_inode(c, p, new_dir, 1);
+               err = ubifs_node_calc_hash(c, p, hash_new_dir);
+               if (err)
+                       goto out_release;
        }
 
        if (last_reference) {
@@ -1200,15 +1311,17 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
        }
        release_head(c, BASEHD);
 
+       ubifs_add_auth_dirt(c, lnum);
+
        dent_key_init(c, &key, new_dir->i_ino, new_nm);
-       err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, new_nm);
+       err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, hash_dent1, new_nm);
        if (err)
                goto out_ro;
 
        offs += aligned_dlen1;
        if (whiteout) {
                dent_key_init(c, &key, old_dir->i_ino, old_nm);
-               err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, old_nm);
+               err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm);
                if (err)
                        goto out_ro;
 
@@ -1227,21 +1340,21 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
        offs += aligned_dlen2;
        if (new_inode) {
                ino_key_init(c, &key, new_inode->i_ino);
-               err = ubifs_tnc_add(c, &key, lnum, offs, ilen);
+               err = ubifs_tnc_add(c, &key, lnum, offs, ilen, hash_new_inode);
                if (err)
                        goto out_ro;
                offs += ALIGN(ilen, 8);
        }
 
        ino_key_init(c, &key, old_dir->i_ino);
-       err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+       err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir);
        if (err)
                goto out_ro;
 
        if (move) {
                offs += ALIGN(plen, 8);
                ino_key_init(c, &key, new_dir->i_ino);
-               err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+               err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_new_dir);
                if (err)
                        goto out_ro;
        }
@@ -1360,6 +1473,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
        struct ubifs_inode *ui = ubifs_inode(inode);
        ino_t inum = inode->i_ino;
        unsigned int blk;
+       u8 hash_ino[UBIFS_HASH_ARR_SZ];
+       u8 hash_dn[UBIFS_HASH_ARR_SZ];
 
        dbg_jnl("ino %lu, size %lld -> %lld",
                (unsigned long)inum, old_size, new_size);
@@ -1369,6 +1484,9 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 
        sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
             UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR;
+
+       sz += ubifs_auth_node_sz(c);
+
        ino = kmalloc(sz, GFP_NOFS);
        if (!ino)
                return -ENOMEM;
@@ -1414,16 +1532,28 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 
        /* Must make reservation before allocating sequence numbers */
        len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ;
-       if (dlen)
+
+       if (ubifs_authenticated(c))
+               len += ALIGN(dlen, 8) + ubifs_auth_node_sz(c);
+       else
                len += dlen;
+
        err = make_reservation(c, BASEHD, len);
        if (err)
                goto out_free;
 
        pack_inode(c, ino, inode, 0);
+       err = ubifs_node_calc_hash(c, ino, hash_ino);
+       if (err)
+               goto out_release;
+
        ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
-       if (dlen)
+       if (dlen) {
                ubifs_prep_grp_node(c, dn, dlen, 1);
+               err = ubifs_node_calc_hash(c, dn, hash_dn);
+               if (err)
+                       goto out_release;
+       }
 
        err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
        if (err)
@@ -1432,15 +1562,17 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
                ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum);
        release_head(c, BASEHD);
 
+       ubifs_add_auth_dirt(c, lnum);
+
        if (dlen) {
                sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ;
-               err = ubifs_tnc_add(c, &key, lnum, sz, dlen);
+               err = ubifs_tnc_add(c, &key, lnum, sz, dlen, hash_dn);
                if (err)
                        goto out_ro;
        }
 
        ino_key_init(c, &key, inum);
-       err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ);
+       err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ, hash_ino);
        if (err)
                goto out_ro;
 
@@ -1495,12 +1627,13 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
                           const struct inode *inode,
                           const struct fscrypt_name *nm)
 {
-       int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen;
+       int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen, write_len;
        struct ubifs_dent_node *xent;
        struct ubifs_ino_node *ino;
        union ubifs_key xent_key, key1, key2;
        int sync = IS_DIRSYNC(host);
        struct ubifs_inode *host_ui = ubifs_inode(host);
+       u8 hash[UBIFS_HASH_ARR_SZ];
 
        ubifs_assert(c, inode->i_nlink == 0);
        ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex));
@@ -1514,12 +1647,14 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
        hlen = host_ui->data_len + UBIFS_INO_NODE_SZ;
        len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8);
 
-       xent = kzalloc(len, GFP_NOFS);
+       write_len = len + ubifs_auth_node_sz(c);
+
+       xent = kzalloc(write_len, GFP_NOFS);
        if (!xent)
                return -ENOMEM;
 
        /* Make reservation before allocating sequence numbers */
-       err = make_reservation(c, BASEHD, len);
+       err = make_reservation(c, BASEHD, write_len);
        if (err) {
                kfree(xent);
                return err;
@@ -1540,11 +1675,16 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
        pack_inode(c, ino, inode, 0);
        ino = (void *)ino + UBIFS_INO_NODE_SZ;
        pack_inode(c, ino, host, 1);
+       err = ubifs_node_calc_hash(c, ino, hash);
+       if (err)
+               goto out_release;
 
-       err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
+       err = write_head(c, BASEHD, xent, write_len, &lnum, &xent_offs, sync);
        if (!sync && !err)
                ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino);
        release_head(c, BASEHD);
+
+       ubifs_add_auth_dirt(c, lnum);
        kfree(xent);
        if (err)
                goto out_ro;
@@ -1572,7 +1712,7 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 
        /* And update TNC with the new host inode position */
        ino_key_init(c, &key1, host->i_ino);
-       err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen);
+       err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen, hash);
        if (err)
                goto out_ro;
 
@@ -1583,6 +1723,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
        mark_inode_clean(c, host_ui);
        return 0;
 
+out_release:
+       kfree(xent);
+       release_head(c, BASEHD);
 out_ro:
        ubifs_ro_mode(c, err);
        finish_reservation(c);
@@ -1610,6 +1753,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
        struct ubifs_ino_node *ino;
        union ubifs_key key;
        int sync = IS_DIRSYNC(host);
+       u8 hash_host[UBIFS_HASH_ARR_SZ];
+       u8 hash[UBIFS_HASH_ARR_SZ];
 
        dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino);
        ubifs_assert(c, host->i_nlink > 0);
@@ -1621,6 +1766,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
        aligned_len1 = ALIGN(len1, 8);
        aligned_len = aligned_len1 + ALIGN(len2, 8);
 
+       aligned_len += ubifs_auth_node_sz(c);
+
        ino = kzalloc(aligned_len, GFP_NOFS);
        if (!ino)
                return -ENOMEM;
@@ -1631,7 +1778,13 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
                goto out_free;
 
        pack_inode(c, ino, host, 0);
+       err = ubifs_node_calc_hash(c, ino, hash_host);
+       if (err)
+               goto out_release;
        pack_inode(c, (void *)ino + aligned_len1, inode, 1);
+       err = ubifs_node_calc_hash(c, (void *)ino + aligned_len1, hash);
+       if (err)
+               goto out_release;
 
        err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
        if (!sync && !err) {
@@ -1644,13 +1797,15 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
        if (err)
                goto out_ro;
 
+       ubifs_add_auth_dirt(c, lnum);
+
        ino_key_init(c, &key, host->i_ino);
-       err = ubifs_tnc_add(c, &key, lnum, offs, len1);
+       err = ubifs_tnc_add(c, &key, lnum, offs, len1, hash_host);
        if (err)
                goto out_ro;
 
        ino_key_init(c, &key, inode->i_ino);
-       err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2);
+       err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2, hash);
        if (err)
                goto out_ro;
 
@@ -1662,6 +1817,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
        kfree(ino);
        return 0;
 
+out_release:
+       release_head(c, BASEHD);
 out_ro:
        ubifs_ro_mode(c, err);
        finish_reservation(c);
index 86b0828f54991d680bb9e2db72632165673ad3e0..15fd854149bbfafa4be60109c9e9b5bccf09a598 100644 (file)
@@ -236,6 +236,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
        bud->lnum = lnum;
        bud->start = offs;
        bud->jhead = jhead;
+       bud->log_hash = NULL;
 
        ref->ch.node_type = UBIFS_REF_NODE;
        ref->lnum = cpu_to_le32(bud->lnum);
@@ -275,6 +276,14 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
        if (err)
                goto out_unlock;
 
+       err = ubifs_shash_update(c, c->log_hash, ref, UBIFS_REF_NODE_SZ);
+       if (err)
+               goto out_unlock;
+
+       err = ubifs_shash_copy_state(c, c->log_hash, c->jheads[jhead].log_hash);
+       if (err)
+               goto out_unlock;
+
        c->lhead_offs += c->ref_node_alsz;
 
        ubifs_add_bud(c, bud);
@@ -377,6 +386,14 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
        cs->cmt_no = cpu_to_le64(c->cmt_no);
        ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
 
+       err = ubifs_shash_init(c, c->log_hash);
+       if (err)
+               goto out;
+
+       err = ubifs_shash_update(c, c->log_hash, cs, UBIFS_CS_NODE_SZ);
+       if (err < 0)
+               goto out;
+
        /*
         * Note, we do not lock 'c->log_mutex' because this is the commit start
         * phase and we are exclusively using the log. And we do not lock
@@ -402,6 +419,12 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
 
                ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0);
                len += UBIFS_REF_NODE_SZ;
+
+               err = ubifs_shash_update(c, c->log_hash, ref,
+                                        UBIFS_REF_NODE_SZ);
+               if (err)
+                       goto out;
+               ubifs_shash_copy_state(c, c->log_hash, c->jheads[i].log_hash);
        }
 
        ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
@@ -516,6 +539,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
                if (err)
                        return err;
                list_del(&bud->list);
+               kfree(bud->log_hash);
                kfree(bud);
        }
        mutex_lock(&c->log_mutex);
index 31393370e334826c182a2ea159df1caa435af158..d1d5e96350ddbd0ff549941655b0e4d8e089beef 100644 (file)
@@ -604,11 +604,12 @@ static int calc_pnode_num_from_parent(const struct ubifs_info *c,
  * @lpt_first: LEB number of first LPT LEB
  * @lpt_lebs: number of LEBs for LPT is passed and returned here
  * @big_lpt: use big LPT model is passed and returned here
+ * @hash: hash of the LPT is returned here
  *
  * This function returns %0 on success and a negative error code on failure.
  */
 int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
-                         int *lpt_lebs, int *big_lpt)
+                         int *lpt_lebs, int *big_lpt, u8 *hash)
 {
        int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row;
        int blnum, boffs, bsz, bcnt;
@@ -617,6 +618,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
        void *buf = NULL, *p;
        struct ubifs_lpt_lprops *ltab = NULL;
        int *lsave = NULL;
+       struct shash_desc *desc;
 
        err = calc_dflt_lpt_geom(c, main_lebs, big_lpt);
        if (err)
@@ -630,6 +632,10 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
        /* Needed by 'ubifs_pack_lsave()' */
        c->main_first = c->leb_cnt - *main_lebs;
 
+       desc = ubifs_hash_get_desc(c);
+       if (IS_ERR(desc))
+               return PTR_ERR(desc);
+
        lsave = kmalloc_array(c->lsave_cnt, sizeof(int), GFP_KERNEL);
        pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL);
        nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL);
@@ -677,6 +683,10 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 
        /* Add first pnode */
        ubifs_pack_pnode(c, p, pnode);
+       err = ubifs_shash_update(c, desc, p, c->pnode_sz);
+       if (err)
+               goto out;
+
        p += c->pnode_sz;
        len = c->pnode_sz;
        pnode->num += 1;
@@ -711,6 +721,10 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
                        len = 0;
                }
                ubifs_pack_pnode(c, p, pnode);
+               err = ubifs_shash_update(c, desc, p, c->pnode_sz);
+               if (err)
+                       goto out;
+
                p += c->pnode_sz;
                len += c->pnode_sz;
                /*
@@ -830,6 +844,10 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
        if (err)
                goto out;
 
+       err = ubifs_shash_final(c, desc, hash);
+       if (err)
+               goto out;
+
        c->nhead_lnum = lnum;
        c->nhead_offs = ALIGN(len, c->min_io_size);
 
@@ -853,6 +871,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
                dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
 out:
        c->ltab = NULL;
+       kfree(desc);
        kfree(lsave);
        vfree(ltab);
        vfree(buf);
@@ -1439,26 +1458,25 @@ struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
 }
 
 /**
- * ubifs_lpt_lookup - lookup LEB properties in the LPT.
+ * ubifs_pnode_lookup - lookup a pnode in the LPT.
  * @c: UBIFS file-system description object
- * @lnum: LEB number to lookup
+ * @i: pnode number (0 to (main_lebs - 1) / UBIFS_LPT_FANOUT)
  *
- * This function returns a pointer to the LEB properties on success or a
- * negative error code on failure.
+ * This function returns a pointer to the pnode on success or a negative
+ * error code on failure.
  */
-struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
+struct ubifs_pnode *ubifs_pnode_lookup(struct ubifs_info *c, int i)
 {
-       int err, i, h, iip, shft;
+       int err, h, iip, shft;
        struct ubifs_nnode *nnode;
-       struct ubifs_pnode *pnode;
 
        if (!c->nroot) {
                err = ubifs_read_nnode(c, NULL, 0);
                if (err)
                        return ERR_PTR(err);
        }
+       i <<= UBIFS_LPT_FANOUT_SHIFT;
        nnode = c->nroot;
-       i = lnum - c->main_first;
        shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
        for (h = 1; h < c->lpt_hght; h++) {
                iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
@@ -1468,7 +1486,24 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
                        return ERR_CAST(nnode);
        }
        iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
-       pnode = ubifs_get_pnode(c, nnode, iip);
+       return ubifs_get_pnode(c, nnode, iip);
+}
+
+/**
+ * ubifs_lpt_lookup - lookup LEB properties in the LPT.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to lookup
+ *
+ * This function returns a pointer to the LEB properties on success or a
+ * negative error code on failure.
+ */
+struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
+{
+       int i, iip;
+       struct ubifs_pnode *pnode;
+
+       i = lnum - c->main_first;
+       pnode = ubifs_pnode_lookup(c, i >> UBIFS_LPT_FANOUT_SHIFT);
        if (IS_ERR(pnode))
                return ERR_CAST(pnode);
        iip = (i & (UBIFS_LPT_FANOUT - 1));
@@ -1619,6 +1654,131 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
        return &pnode->lprops[iip];
 }
 
+/**
+ * ubifs_lpt_calc_hash - Calculate hash of the LPT pnodes
+ * @c: UBIFS file-system description object
+ * @hash: the returned hash of the LPT pnodes
+ *
+ * This function iterates over the LPT pnodes and creates a hash over them.
+ * Returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_lpt_calc_hash(struct ubifs_info *c, u8 *hash)
+{
+       struct ubifs_nnode *nnode, *nn;
+       struct ubifs_cnode *cnode;
+       struct shash_desc *desc;
+       int iip = 0, i;
+       int bufsiz = max_t(int, c->nnode_sz, c->pnode_sz);
+       void *buf;
+       int err;
+
+       if (!ubifs_authenticated(c))
+               return 0;
+
+       desc = ubifs_hash_get_desc(c);
+       if (IS_ERR(desc))
+               return PTR_ERR(desc);
+
+       buf = kmalloc(bufsiz, GFP_NOFS);
+       if (!buf) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       if (!c->nroot) {
+               err = ubifs_read_nnode(c, NULL, 0);
+               if (err)
+                       return err;
+       }
+
+       cnode = (struct ubifs_cnode *)c->nroot;
+
+       while (cnode) {
+               nnode = cnode->parent;
+               nn = (struct ubifs_nnode *)cnode;
+               if (cnode->level > 1) {
+                       while (iip < UBIFS_LPT_FANOUT) {
+                               if (nn->nbranch[iip].lnum == 0) {
+                                       /* Go right */
+                                       iip++;
+                                       continue;
+                               }
+
+                               nnode = ubifs_get_nnode(c, nn, iip);
+                               if (IS_ERR(nnode)) {
+                                       err = PTR_ERR(nnode);
+                                       goto out;
+                               }
+
+                               /* Go down */
+                               iip = 0;
+                               cnode = (struct ubifs_cnode *)nnode;
+                               break;
+                       }
+                       if (iip < UBIFS_LPT_FANOUT)
+                               continue;
+               } else {
+                       struct ubifs_pnode *pnode;
+
+                       for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+                               if (nn->nbranch[i].lnum == 0)
+                                       continue;
+                               pnode = ubifs_get_pnode(c, nn, i);
+                               if (IS_ERR(pnode)) {
+                                       err = PTR_ERR(pnode);
+                                       goto out;
+                               }
+
+                               ubifs_pack_pnode(c, buf, pnode);
+                               err = ubifs_shash_update(c, desc, buf,
+                                                        c->pnode_sz);
+                               if (err)
+                                       goto out;
+                       }
+               }
+               /* Go up and to the right */
+               iip = cnode->iip + 1;
+               cnode = (struct ubifs_cnode *)nnode;
+       }
+
+       err = ubifs_shash_final(c, desc, hash);
+out:
+       kfree(desc);
+       kfree(buf);
+
+       return err;
+}
+
+/**
+ * lpt_check_hash - check the hash of the LPT.
+ * @c: UBIFS file-system description object
+ *
+ * This function calculates a hash over all pnodes in the LPT and compares it with
+ * the hash stored in the master node. Returns %0 on success and a negative error
+ * code on failure.
+ */
+static int lpt_check_hash(struct ubifs_info *c)
+{
+       int err;
+       u8 hash[UBIFS_HASH_ARR_SZ];
+
+       if (!ubifs_authenticated(c))
+               return 0;
+
+       err = ubifs_lpt_calc_hash(c, hash);
+       if (err)
+               return err;
+
+       if (ubifs_check_hash(c, c->mst_node->hash_lpt, hash)) {
+               err = -EPERM;
+               ubifs_err(c, "Failed to authenticate LPT");
+       } else {
+               err = 0;
+       }
+
+       return err;
+}
+
 /**
  * lpt_init_rd - initialize the LPT for reading.
  * @c: UBIFS file-system description object
@@ -1660,6 +1820,10 @@ static int lpt_init_rd(struct ubifs_info *c)
        if (err)
                return err;
 
+       err = lpt_check_hash(c);
+       if (err)
+               return err;
+
        dbg_lp("space_bits %d", c->space_bits);
        dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits);
        dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits);
index 7ce30994bbbac726c30a16262cbfd4411243026e..1f88caffdf2acfae85113a064c68f0e3fbefb9a5 100644 (file)
@@ -618,38 +618,6 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
        return ubifs_get_pnode(c, nnode, iip);
 }
 
-/**
- * pnode_lookup - lookup a pnode in the LPT.
- * @c: UBIFS file-system description object
- * @i: pnode number (0 to (main_lebs - 1) / UBIFS_LPT_FANOUT))
- *
- * This function returns a pointer to the pnode on success or a negative
- * error code on failure.
- */
-static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i)
-{
-       int err, h, iip, shft;
-       struct ubifs_nnode *nnode;
-
-       if (!c->nroot) {
-               err = ubifs_read_nnode(c, NULL, 0);
-               if (err)
-                       return ERR_PTR(err);
-       }
-       i <<= UBIFS_LPT_FANOUT_SHIFT;
-       nnode = c->nroot;
-       shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
-       for (h = 1; h < c->lpt_hght; h++) {
-               iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
-               shft -= UBIFS_LPT_FANOUT_SHIFT;
-               nnode = ubifs_get_nnode(c, nnode, iip);
-               if (IS_ERR(nnode))
-                       return ERR_CAST(nnode);
-       }
-       iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
-       return ubifs_get_pnode(c, nnode, iip);
-}
-
 /**
  * add_pnode_dirt - add dirty space to LPT LEB properties.
  * @c: UBIFS file-system description object
@@ -702,7 +670,7 @@ static int make_tree_dirty(struct ubifs_info *c)
 {
        struct ubifs_pnode *pnode;
 
-       pnode = pnode_lookup(c, 0);
+       pnode = ubifs_pnode_lookup(c, 0);
        if (IS_ERR(pnode))
                return PTR_ERR(pnode);
 
@@ -956,7 +924,7 @@ static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum,
        struct ubifs_pnode *pnode;
        struct ubifs_nbranch *branch;
 
-       pnode = pnode_lookup(c, node_num);
+       pnode = ubifs_pnode_lookup(c, node_num);
        if (IS_ERR(pnode))
                return PTR_ERR(pnode);
        branch = &pnode->parent->nbranch[pnode->iip];
@@ -1279,6 +1247,10 @@ int ubifs_lpt_start_commit(struct ubifs_info *c)
        if (err)
                goto out;
 
+       err = ubifs_lpt_calc_hash(c, c->mst_node->hash_lpt);
+       if (err)
+               goto out;
+
        /* Copy the LPT's own lprops for end commit to write */
        memcpy(c->ltab_cmt, c->ltab,
               sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
@@ -1558,7 +1530,7 @@ static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs)
                struct ubifs_nbranch *branch;
 
                cond_resched();
-               pnode = pnode_lookup(c, i);
+               pnode = ubifs_pnode_lookup(c, i);
                if (IS_ERR(pnode))
                        return PTR_ERR(pnode);
                branch = &pnode->parent->nbranch[pnode->iip];
@@ -1710,7 +1682,7 @@ int dbg_check_ltab(struct ubifs_info *c)
        for (i = 0; i < cnt; i++) {
                struct ubifs_pnode *pnode;
 
-               pnode = pnode_lookup(c, i);
+               pnode = ubifs_pnode_lookup(c, i);
                if (IS_ERR(pnode))
                        return PTR_ERR(pnode);
                cond_resched();
index 9df4a41bba523d6a58fb3369cd90225a1ad8617a..5ea51bbd14c7f29cb4501d6ea3a9f5863e8517d2 100644 (file)
 
 #include "ubifs.h"
 
+/**
+ * ubifs_compare_master_node - compare two UBIFS master nodes
+ * @c: UBIFS file-system description object
+ * @m1: the first node
+ * @m2: the second node
+ *
+ * This function compares two UBIFS master nodes. Returns 0 if they are equal
+ * and nonzero if not.
+ */
+int ubifs_compare_master_node(struct ubifs_info *c, void *m1, void *m2)
+{
+       int ret;
+       int behind;
+       int hmac_offs = offsetof(struct ubifs_mst_node, hmac);
+
+       /*
+        * Do not compare the common node header since the sequence number and
+        * hence the CRC are different.
+        */
+       ret = memcmp(m1 + UBIFS_CH_SZ, m2 + UBIFS_CH_SZ,
+                    hmac_offs - UBIFS_CH_SZ);
+       if (ret)
+               return ret;
+
+       /*
+        * Do not compare the embedded HMAC aswell which also must be different
+        * due to the different common node header.
+        */
+       behind = hmac_offs + UBIFS_MAX_HMAC_LEN;
+
+       if (UBIFS_MST_NODE_SZ > behind)
+               return memcmp(m1 + behind, m2 + behind, UBIFS_MST_NODE_SZ - behind);
+
+       return 0;
+}
+
 /**
  * scan_for_master - search the valid master node.
  * @c: UBIFS file-system description object
@@ -37,7 +73,7 @@ static int scan_for_master(struct ubifs_info *c)
 {
        struct ubifs_scan_leb *sleb;
        struct ubifs_scan_node *snod;
-       int lnum, offs = 0, nodes_cnt;
+       int lnum, offs = 0, nodes_cnt, err;
 
        lnum = UBIFS_MST_LNUM;
 
@@ -69,12 +105,23 @@ static int scan_for_master(struct ubifs_info *c)
                goto out_dump;
        if (snod->offs != offs)
                goto out;
-       if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
-                  (void *)snod->node + UBIFS_CH_SZ,
-                  UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
+       if (ubifs_compare_master_node(c, c->mst_node, snod->node))
                goto out;
+
        c->mst_offs = offs;
        ubifs_scan_destroy(sleb);
+
+       if (!ubifs_authenticated(c))
+               return 0;
+
+       err = ubifs_node_verify_hmac(c, c->mst_node,
+                                    sizeof(struct ubifs_mst_node),
+                                    offsetof(struct ubifs_mst_node, hmac));
+       if (err) {
+               ubifs_err(c, "Failed to verify master node HMAC");
+               return -EPERM;
+       }
+
        return 0;
 
 out:
@@ -305,6 +352,8 @@ int ubifs_read_master(struct ubifs_info *c)
        c->lst.total_dead  = le64_to_cpu(c->mst_node->total_dead);
        c->lst.total_dark  = le64_to_cpu(c->mst_node->total_dark);
 
+       ubifs_copy_hash(c, c->mst_node->hash_root_idx, c->zroot.hash);
+
        c->calc_idx_sz = c->bi.old_idx_sz;
 
        if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
@@ -378,7 +427,9 @@ int ubifs_write_master(struct ubifs_info *c)
        c->mst_offs = offs;
        c->mst_node->highest_inum = cpu_to_le64(c->highest_inum);
 
-       err = ubifs_write_node(c, c->mst_node, len, lnum, offs);
+       ubifs_copy_hash(c, c->zroot.hash, c->mst_node->hash_root_idx);
+       err = ubifs_write_node_hmac(c, c->mst_node, len, lnum, offs,
+                                   offsetof(struct ubifs_mst_node, hmac));
        if (err)
                return err;
 
@@ -389,7 +440,8 @@ int ubifs_write_master(struct ubifs_info *c)
                if (err)
                        return err;
        }
-       err = ubifs_write_node(c, c->mst_node, len, lnum, offs);
+       err = ubifs_write_node_hmac(c, c->mst_node, len, lnum, offs,
+                                   offsetof(struct ubifs_mst_node, hmac));
 
        return err;
 }
index 21d35d7dd975ca32c6d638e448b2e1632db1aa35..6f87237fdbf43bddc1455df20edf91abfca60caa 100644 (file)
@@ -197,7 +197,8 @@ static inline int ubifs_return_leb(struct ubifs_info *c, int lnum)
  */
 static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt)
 {
-       return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt;
+       return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len + c->hash_len)
+                                  * child_cnt;
 }
 
 /**
@@ -212,7 +213,7 @@ struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c,
                                      int bnum)
 {
        return (struct ubifs_branch *)((void *)idx->branches +
-                                      (UBIFS_BRANCH_SZ + c->key_len) * bnum);
+                       (UBIFS_BRANCH_SZ + c->key_len + c->hash_len) * bnum);
 }
 
 /**
index 984e30e83c0b3eead78fdc61190026c78cd15d51..8526b7ec47077675fad73e1a74f48adeb250d212 100644 (file)
@@ -212,7 +212,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c,
        save_flags = mst->flags;
        mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
 
-       ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
+       err = ubifs_prepare_node_hmac(c, mst, UBIFS_MST_NODE_SZ,
+                                     offsetof(struct ubifs_mst_node, hmac), 1);
+       if (err)
+               goto out;
        err = ubifs_leb_change(c, lnum, mst, sz);
        if (err)
                goto out;
@@ -264,9 +267,7 @@ int ubifs_recover_master_node(struct ubifs_info *c)
                        offs2 = (void *)mst2 - buf2;
                        if (offs1 == offs2) {
                                /* Same offset, so must be the same */
-                               if (memcmp((void *)mst1 + UBIFS_CH_SZ,
-                                          (void *)mst2 + UBIFS_CH_SZ,
-                                          UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
+                               if (ubifs_compare_master_node(c, mst1, mst2))
                                        goto out_err;
                                mst = mst1;
                        } else if (offs2 + sz == offs1) {
@@ -1461,16 +1462,82 @@ out:
        return err;
 }
 
+/**
+ * inode_fix_size - fix inode size
+ * @c: UBIFS file-system description object
+ * @e: inode size information for recovery
+ */
+static int inode_fix_size(struct ubifs_info *c, struct size_entry *e)
+{
+       struct inode *inode;
+       struct ubifs_inode *ui;
+       int err;
+
+       if (c->ro_mount)
+               ubifs_assert(c, !e->inode);
+
+       if (e->inode) {
+               /* Remounting rw, pick up inode we stored earlier */
+               inode = e->inode;
+       } else {
+               inode = ubifs_iget(c->vfs_sb, e->inum);
+               if (IS_ERR(inode))
+                       return PTR_ERR(inode);
+
+               if (inode->i_size >= e->d_size) {
+                       /*
+                        * The original inode in the index already has a size
+                        * big enough, nothing to do
+                        */
+                       iput(inode);
+                       return 0;
+               }
+
+               dbg_rcvry("ino %lu size %lld -> %lld",
+                         (unsigned long)e->inum,
+                         inode->i_size, e->d_size);
+
+               ui = ubifs_inode(inode);
+
+               inode->i_size = e->d_size;
+               ui->ui_size = e->d_size;
+               ui->synced_i_size = e->d_size;
+
+               e->inode = inode;
+       }
+
+       /*
+        * In readonly mode just keep the inode pinned in memory until we go
+        * readwrite. In readwrite mode write the inode to the journal with the
+        * fixed size.
+        */
+       if (c->ro_mount)
+               return 0;
+
+       err = ubifs_jnl_write_inode(c, inode);
+
+       iput(inode);
+
+       if (err)
+               return err;
+
+       rb_erase(&e->rb, &c->size_tree);
+       kfree(e);
+
+       return 0;
+}
+
 /**
  * ubifs_recover_size - recover inode size.
  * @c: UBIFS file-system description object
+ * @in_place: If true, do a in-place size fixup
  *
  * This function attempts to fix inode size discrepancies identified by the
  * 'ubifs_recover_size_accum()' function.
  *
  * This functions returns %0 on success and a negative error code on failure.
  */
-int ubifs_recover_size(struct ubifs_info *c)
+int ubifs_recover_size(struct ubifs_info *c, bool in_place)
 {
        struct rb_node *this = rb_first(&c->size_tree);
 
@@ -1479,6 +1546,9 @@ int ubifs_recover_size(struct ubifs_info *c)
                int err;
 
                e = rb_entry(this, struct size_entry, rb);
+
+               this = rb_next(this);
+
                if (!e->exists) {
                        union ubifs_key key;
 
@@ -1502,40 +1572,26 @@ int ubifs_recover_size(struct ubifs_info *c)
                }
 
                if (e->exists && e->i_size < e->d_size) {
-                       if (c->ro_mount) {
-                               /* Fix the inode size and pin it in memory */
-                               struct inode *inode;
-                               struct ubifs_inode *ui;
-
-                               ubifs_assert(c, !e->inode);
-
-                               inode = ubifs_iget(c->vfs_sb, e->inum);
-                               if (IS_ERR(inode))
-                                       return PTR_ERR(inode);
-
-                               ui = ubifs_inode(inode);
-                               if (inode->i_size < e->d_size) {
-                                       dbg_rcvry("ino %lu size %lld -> %lld",
-                                                 (unsigned long)e->inum,
-                                                 inode->i_size, e->d_size);
-                                       inode->i_size = e->d_size;
-                                       ui->ui_size = e->d_size;
-                                       ui->synced_i_size = e->d_size;
-                                       e->inode = inode;
-                                       this = rb_next(this);
-                                       continue;
-                               }
-                               iput(inode);
-                       } else {
-                               /* Fix the size in place */
+                       ubifs_assert(c, !(c->ro_mount && in_place));
+
+                       /*
+                        * We found data that is outside the found inode size,
+                        * fixup the inode size
+                        */
+
+                       if (in_place) {
                                err = fix_size_in_place(c, e);
                                if (err)
                                        return err;
                                iput(e->inode);
+                       } else {
+                               err = inode_fix_size(c, e);
+                               if (err)
+                                       return err;
+                               continue;
                        }
                }
 
-               this = rb_next(this);
                rb_erase(&e->rb, &c->size_tree);
                kfree(e);
        }
index 4844538eb92626212394839e4734111866fbc085..75f961c4c0449505aaac3f085aa80b00acb0b8ab 100644 (file)
@@ -34,6 +34,8 @@
 
 #include "ubifs.h"
 #include <linux/list_sort.h>
+#include <crypto/hash.h>
+#include <crypto/algapi.h>
 
 /**
  * struct replay_entry - replay list entry.
@@ -56,6 +58,7 @@ struct replay_entry {
        int lnum;
        int offs;
        int len;
+       u8 hash[UBIFS_HASH_ARR_SZ];
        unsigned int deletion:1;
        unsigned long long sqnum;
        struct list_head list;
@@ -228,7 +231,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
                        err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
                else
                        err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
-                                              r->len, &r->nm);
+                                              r->len, r->hash, &r->nm);
        } else {
                if (r->deletion)
                        switch (key_type(c, &r->key)) {
@@ -248,7 +251,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
                        }
                else
                        err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
-                                           r->len);
+                                           r->len, r->hash);
                if (err)
                        return err;
 
@@ -352,9 +355,9 @@ static void destroy_replay_list(struct ubifs_info *c)
  * in case of success and a negative error code in case of failure.
  */
 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
-                      union ubifs_key *key, unsigned long long sqnum,
-                      int deletion, int *used, loff_t old_size,
-                      loff_t new_size)
+                      const u8 *hash, union ubifs_key *key,
+                      unsigned long long sqnum, int deletion, int *used,
+                      loff_t old_size, loff_t new_size)
 {
        struct replay_entry *r;
 
@@ -372,6 +375,7 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
        r->lnum = lnum;
        r->offs = offs;
        r->len = len;
+       ubifs_copy_hash(c, hash, r->hash);
        r->deletion = !!deletion;
        r->sqnum = sqnum;
        key_copy(c, key, &r->key);
@@ -400,8 +404,9 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
  * negative error code in case of failure.
  */
 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
-                      union ubifs_key *key, const char *name, int nlen,
-                      unsigned long long sqnum, int deletion, int *used)
+                      const u8 *hash, union ubifs_key *key,
+                      const char *name, int nlen, unsigned long long sqnum,
+                      int deletion, int *used)
 {
        struct replay_entry *r;
        char *nbuf;
@@ -425,6 +430,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
        r->lnum = lnum;
        r->offs = offs;
        r->len = len;
+       ubifs_copy_hash(c, hash, r->hash);
        r->deletion = !!deletion;
        r->sqnum = sqnum;
        key_copy(c, key, &r->key);
@@ -527,6 +533,105 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
        return data == 0xFFFFFFFF;
 }
 
+/**
+ * authenticate_sleb - authenticate one scan LEB
+ * @c: UBIFS file-system description object
+ * @sleb: the scan LEB to authenticate
+ * @log_hash:
+ * @is_last: if true, this is is the last LEB
+ *
+ * This function iterates over the buds of a single LEB authenticating all buds
+ * with the authentication nodes on this LEB. Authentication nodes are written
+ * after some buds and contain a HMAC covering the authentication node itself
+ * and the buds between the last authentication node and the current
+ * authentication node. It can happen that the last buds cannot be authenticated
+ * because a powercut happened when some nodes were written but not the
+ * corresponding authentication node. This function returns the number of nodes
+ * that could be authenticated or a negative error code.
+ */
+static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+                            struct shash_desc *log_hash, int is_last)
+{
+       int n_not_auth = 0;
+       struct ubifs_scan_node *snod;
+       int n_nodes = 0;
+       int err;
+       u8 *hash, *hmac;
+
+       if (!ubifs_authenticated(c))
+               return sleb->nodes_cnt;
+
+       hash = kmalloc(crypto_shash_descsize(c->hash_tfm), GFP_NOFS);
+       hmac = kmalloc(c->hmac_desc_len, GFP_NOFS);
+       if (!hash || !hmac) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       list_for_each_entry(snod, &sleb->nodes, list) {
+
+               n_nodes++;
+
+               if (snod->type == UBIFS_AUTH_NODE) {
+                       struct ubifs_auth_node *auth = snod->node;
+                       SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
+                       SHASH_DESC_ON_STACK(hmac_desc, c->hmac_tfm);
+
+                       hash_desc->tfm = c->hash_tfm;
+                       hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+                       ubifs_shash_copy_state(c, log_hash, hash_desc);
+                       err = crypto_shash_final(hash_desc, hash);
+                       if (err)
+                               goto out;
+
+                       hmac_desc->tfm = c->hmac_tfm;
+                       hmac_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+                       err = crypto_shash_digest(hmac_desc, hash, c->hash_len,
+                                                 hmac);
+                       if (err)
+                               goto out;
+
+                       err = ubifs_check_hmac(c, auth->hmac, hmac);
+                       if (err) {
+                               err = -EPERM;
+                               goto out;
+                       }
+                       n_not_auth = 0;
+               } else {
+                       err = crypto_shash_update(log_hash, snod->node,
+                                                 snod->len);
+                       if (err)
+                               goto out;
+                       n_not_auth++;
+               }
+       }
+
+       /*
+        * A powercut can happen when some nodes were written, but not yet
+        * the corresponding authentication node. This may only happen on
+        * the last bud though.
+        */
+       if (n_not_auth) {
+               if (is_last) {
+                       dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them",
+                               n_not_auth, sleb->lnum);
+                       err = 0;
+               } else {
+                       dbg_mnt("%d unauthenticated nodes found on non-last LEB %d",
+                               n_not_auth, sleb->lnum);
+                       err = -EPERM;
+               }
+       } else {
+               err = 0;
+       }
+out:
+       kfree(hash);
+       kfree(hmac);
+
+       return err ? err : n_nodes - n_not_auth;
+}
+
 /**
  * replay_bud - replay a bud logical eraseblock.
  * @c: UBIFS file-system description object
@@ -540,6 +645,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 {
        int is_last = is_last_bud(c, b->bud);
        int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
+       int n_nodes, n = 0;
        struct ubifs_scan_leb *sleb;
        struct ubifs_scan_node *snod;
 
@@ -559,6 +665,15 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
        if (IS_ERR(sleb))
                return PTR_ERR(sleb);
 
+       n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last);
+       if (n_nodes < 0) {
+               err = n_nodes;
+               goto out;
+       }
+
+       ubifs_shash_copy_state(c, b->bud->log_hash,
+                              c->jheads[b->bud->jhead].log_hash);
+
        /*
         * The bud does not have to start from offset zero - the beginning of
         * the 'lnum' LEB may contain previously committed data. One of the
@@ -582,6 +697,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
         */
 
        list_for_each_entry(snod, &sleb->nodes, list) {
+               u8 hash[UBIFS_HASH_ARR_SZ];
                int deletion = 0;
 
                cond_resched();
@@ -591,6 +707,8 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
                        goto out_dump;
                }
 
+               ubifs_node_calc_hash(c, snod->node, hash);
+
                if (snod->sqnum > c->max_sqnum)
                        c->max_sqnum = snod->sqnum;
 
@@ -602,7 +720,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 
                        if (le32_to_cpu(ino->nlink) == 0)
                                deletion = 1;
-                       err = insert_node(c, lnum, snod->offs, snod->len,
+                       err = insert_node(c, lnum, snod->offs, snod->len, hash,
                                          &snod->key, snod->sqnum, deletion,
                                          &used, 0, new_size);
                        break;
@@ -614,7 +732,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
                                          key_block(c, &snod->key) *
                                          UBIFS_BLOCK_SIZE;
 
-                       err = insert_node(c, lnum, snod->offs, snod->len,
+                       err = insert_node(c, lnum, snod->offs, snod->len, hash,
                                          &snod->key, snod->sqnum, deletion,
                                          &used, 0, new_size);
                        break;
@@ -628,7 +746,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
                        if (err)
                                goto out_dump;
 
-                       err = insert_dent(c, lnum, snod->offs, snod->len,
+                       err = insert_dent(c, lnum, snod->offs, snod->len, hash,
                                          &snod->key, dent->name,
                                          le16_to_cpu(dent->nlen), snod->sqnum,
                                          !le64_to_cpu(dent->inum), &used);
@@ -654,11 +772,13 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
                         * functions which expect nodes to have keys.
                         */
                        trun_key_init(c, &key, le32_to_cpu(trun->inum));
-                       err = insert_node(c, lnum, snod->offs, snod->len,
+                       err = insert_node(c, lnum, snod->offs, snod->len, hash,
                                          &key, snod->sqnum, 1, &used,
                                          old_size, new_size);
                        break;
                }
+               case UBIFS_AUTH_NODE:
+                       break;
                default:
                        ubifs_err(c, "unexpected node type %d in bud LEB %d:%d",
                                  snod->type, lnum, snod->offs);
@@ -667,6 +787,10 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
                }
                if (err)
                        goto out;
+
+               n++;
+               if (n == n_nodes)
+                       break;
        }
 
        ubifs_assert(c, ubifs_search_bud(c, lnum));
@@ -745,6 +869,7 @@ static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
 {
        struct ubifs_bud *bud;
        struct bud_entry *b;
+       int err;
 
        dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
 
@@ -754,13 +879,21 @@ static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
 
        b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
        if (!b) {
-               kfree(bud);
-               return -ENOMEM;
+               err = -ENOMEM;
+               goto out;
        }
 
        bud->lnum = lnum;
        bud->start = offs;
        bud->jhead = jhead;
+       bud->log_hash = ubifs_hash_get_desc(c);
+       if (IS_ERR(bud->log_hash)) {
+               err = PTR_ERR(bud->log_hash);
+               goto out;
+       }
+
+       ubifs_shash_copy_state(c, c->log_hash, bud->log_hash);
+
        ubifs_add_bud(c, bud);
 
        b->bud = bud;
@@ -768,6 +901,11 @@ static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
        list_add_tail(&b->list, &c->replay_buds);
 
        return 0;
+out:
+       kfree(bud);
+       kfree(b);
+
+       return err;
 }
 
 /**
@@ -873,6 +1011,14 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
 
                c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
                dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
+
+               err = ubifs_shash_init(c, c->log_hash);
+               if (err)
+                       goto out;
+
+               err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ);
+               if (err < 0)
+                       goto out;
        }
 
        if (snod->sqnum < c->cs_sqnum) {
@@ -920,6 +1066,11 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
                        if (err)
                                goto out_dump;
 
+                       err = ubifs_shash_update(c, c->log_hash, ref,
+                                                UBIFS_REF_NODE_SZ);
+                       if (err)
+                               goto out;
+
                        err = add_replay_bud(c, le32_to_cpu(ref->lnum),
                                             le32_to_cpu(ref->offs),
                                             le32_to_cpu(ref->jhead),
index bf17f58908ff95bc63efbe48dd5ede204a28ac78..75a69dd26d6eafa609a01a2f9969c22c37512346 100644 (file)
@@ -82,10 +82,13 @@ static int create_default_filesystem(struct ubifs_info *c)
        int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
        int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
        int min_leb_cnt = UBIFS_MIN_LEB_CNT;
+       int idx_node_size;
        long long tmp64, main_bytes;
        __le64 tmp_le64;
        __le32 tmp_le32;
        struct timespec64 ts;
+       u8 hash[UBIFS_HASH_ARR_SZ];
+       u8 hash_lpt[UBIFS_HASH_ARR_SZ];
 
        /* Some functions called from here depend on the @c->key_len filed */
        c->key_len = UBIFS_SK_LEN;
@@ -147,7 +150,7 @@ static int create_default_filesystem(struct ubifs_info *c)
        c->lsave_cnt = DEFAULT_LSAVE_CNT;
        c->max_leb_cnt = c->leb_cnt;
        err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs,
-                                   &big_lpt);
+                                   &big_lpt, hash_lpt);
        if (err)
                return err;
 
@@ -156,17 +159,35 @@ static int create_default_filesystem(struct ubifs_info *c)
 
        main_first = c->leb_cnt - main_lebs;
 
+       sup = kzalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_KERNEL);
+       mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);
+       idx_node_size = ubifs_idx_node_sz(c, 1);
+       idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);
+       ino = kzalloc(ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size), GFP_KERNEL);
+       cs = kzalloc(ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size), GFP_KERNEL);
+
+       if (!sup || !mst || !idx || !ino || !cs) {
+               err = -ENOMEM;
+               goto out;
+       }
+
        /* Create default superblock */
-       tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
-       sup = kzalloc(tmp, GFP_KERNEL);
-       if (!sup)
-               return -ENOMEM;
 
        tmp64 = (long long)max_buds * c->leb_size;
        if (big_lpt)
                sup_flags |= UBIFS_FLG_BIGLPT;
        sup_flags |= UBIFS_FLG_DOUBLE_HASH;
 
+       if (ubifs_authenticated(c)) {
+               sup_flags |= UBIFS_FLG_AUTHENTICATION;
+               sup->hash_algo = cpu_to_le16(c->auth_hash_algo);
+               err = ubifs_hmac_wkm(c, sup->hmac_wkm);
+               if (err)
+                       goto out;
+       } else {
+               sup->hash_algo = 0xffff;
+       }
+
        sup->ch.node_type  = UBIFS_SB_NODE;
        sup->key_hash      = UBIFS_KEY_HASH_R5;
        sup->flags         = cpu_to_le32(sup_flags);
@@ -197,17 +218,9 @@ static int create_default_filesystem(struct ubifs_info *c)
        sup->rp_size = cpu_to_le64(tmp64);
        sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);
 
-       err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0);
-       kfree(sup);
-       if (err)
-               return err;
-
        dbg_gen("default superblock created at LEB 0:0");
 
        /* Create default master node */
-       mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);
-       if (!mst)
-               return -ENOMEM;
 
        mst->ch.node_type = UBIFS_MST_NODE;
        mst->log_lnum     = cpu_to_le32(UBIFS_LOG_LNUM);
@@ -233,6 +246,7 @@ static int create_default_filesystem(struct ubifs_info *c)
        mst->empty_lebs   = cpu_to_le32(main_lebs - 2);
        mst->idx_lebs     = cpu_to_le32(1);
        mst->leb_cnt      = cpu_to_le32(c->leb_cnt);
+       ubifs_copy_hash(c, hash_lpt, mst->hash_lpt);
 
        /* Calculate lprops statistics */
        tmp64 = main_bytes;
@@ -253,24 +267,9 @@ static int create_default_filesystem(struct ubifs_info *c)
 
        mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ);
 
-       err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0);
-       if (err) {
-               kfree(mst);
-               return err;
-       }
-       err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1,
-                              0);
-       kfree(mst);
-       if (err)
-               return err;
-
        dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM);
 
        /* Create the root indexing node */
-       tmp = ubifs_idx_node_sz(c, 1);
-       idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);
-       if (!idx)
-               return -ENOMEM;
 
        c->key_fmt = UBIFS_SIMPLE_KEY_FMT;
        c->key_hash = key_r5_hash;
@@ -282,19 +281,11 @@ static int create_default_filesystem(struct ubifs_info *c)
        key_write_idx(c, &key, &br->key);
        br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB);
        br->len  = cpu_to_le32(UBIFS_INO_NODE_SZ);
-       err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0);
-       kfree(idx);
-       if (err)
-               return err;
 
        dbg_gen("default root indexing node created LEB %d:0",
                main_first + DEFAULT_IDX_LEB);
 
        /* Create default root inode */
-       tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
-       ino = kzalloc(tmp, GFP_KERNEL);
-       if (!ino)
-               return -ENOMEM;
 
        ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO);
        ino->ch.node_type = UBIFS_INO_NODE;
@@ -317,12 +308,6 @@ static int create_default_filesystem(struct ubifs_info *c)
        /* Set compression enabled by default */
        ino->flags = cpu_to_le32(UBIFS_COMPR_FL);
 
-       err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
-                              main_first + DEFAULT_DATA_LEB, 0);
-       kfree(ino);
-       if (err)
-               return err;
-
        dbg_gen("root inode created at LEB %d:0",
                main_first + DEFAULT_DATA_LEB);
 
@@ -331,19 +316,54 @@ static int create_default_filesystem(struct ubifs_info *c)
         * always the case during normal file-system operation. Write a fake
         * commit start node to the log.
         */
-       tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size);
-       cs = kzalloc(tmp, GFP_KERNEL);
-       if (!cs)
-               return -ENOMEM;
 
        cs->ch.node_type = UBIFS_CS_NODE;
+
+       err = ubifs_write_node_hmac(c, sup, UBIFS_SB_NODE_SZ, 0, 0,
+                                   offsetof(struct ubifs_sb_node, hmac));
+       if (err)
+               goto out;
+
+       err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
+                              main_first + DEFAULT_DATA_LEB, 0);
+       if (err)
+               goto out;
+
+       ubifs_node_calc_hash(c, ino, hash);
+       ubifs_copy_hash(c, hash, ubifs_branch_hash(c, br));
+
+       err = ubifs_write_node(c, idx, idx_node_size, main_first + DEFAULT_IDX_LEB, 0);
+       if (err)
+               goto out;
+
+       ubifs_node_calc_hash(c, idx, hash);
+       ubifs_copy_hash(c, hash, mst->hash_root_idx);
+
+       err = ubifs_write_node_hmac(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0,
+               offsetof(struct ubifs_mst_node, hmac));
+       if (err)
+               goto out;
+
+       err = ubifs_write_node_hmac(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1,
+                              0, offsetof(struct ubifs_mst_node, hmac));
+       if (err)
+               goto out;
+
        err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0);
-       kfree(cs);
        if (err)
-               return err;
+               goto out;
 
        ubifs_msg(c, "default file-system created");
-       return 0;
+
+       err = 0;
+out:
+       kfree(sup);
+       kfree(mst);
+       kfree(idx);
+       kfree(ino);
+       kfree(cs);
+
+       return err;
 }
 
 /**
@@ -498,7 +518,7 @@ failed:
  * code. Note, the user of this function is responsible of kfree()'ing the
  * returned superblock buffer.
  */
-struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
+static struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
 {
        struct ubifs_sb_node *sup;
        int err;
@@ -517,6 +537,65 @@ struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
        return sup;
 }
 
+static int authenticate_sb_node(struct ubifs_info *c,
+                               const struct ubifs_sb_node *sup)
+{
+       unsigned int sup_flags = le32_to_cpu(sup->flags);
+       u8 hmac_wkm[UBIFS_HMAC_ARR_SZ];
+       int authenticated = !!(sup_flags & UBIFS_FLG_AUTHENTICATION);
+       int hash_algo;
+       int err;
+
+       if (c->authenticated && !authenticated) {
+               ubifs_err(c, "authenticated FS forced, but found FS without authentication");
+               return -EINVAL;
+       }
+
+       if (!c->authenticated && authenticated) {
+               ubifs_err(c, "authenticated FS found, but no key given");
+               return -EINVAL;
+       }
+
+       ubifs_msg(c, "Mounting in %sauthenticated mode",
+                 c->authenticated ? "" : "un");
+
+       if (!c->authenticated)
+               return 0;
+
+       if (!IS_ENABLED(CONFIG_UBIFS_FS_AUTHENTICATION))
+               return -EOPNOTSUPP;
+
+       hash_algo = le16_to_cpu(sup->hash_algo);
+       if (hash_algo >= HASH_ALGO__LAST) {
+               ubifs_err(c, "superblock uses unknown hash algo %d",
+                         hash_algo);
+               return -EINVAL;
+       }
+
+       if (strcmp(hash_algo_name[hash_algo], c->auth_hash_name)) {
+               ubifs_err(c, "This filesystem uses %s for hashing,"
+                            " but %s is specified", hash_algo_name[hash_algo],
+                            c->auth_hash_name);
+               return -EINVAL;
+       }
+
+       err = ubifs_hmac_wkm(c, hmac_wkm);
+       if (err)
+               return err;
+
+       if (ubifs_check_hmac(c, hmac_wkm, sup->hmac_wkm)) {
+               ubifs_err(c, "provided key does not fit");
+               return -ENOKEY;
+       }
+
+       err = ubifs_node_verify_hmac(c, sup, sizeof(*sup),
+                                    offsetof(struct ubifs_sb_node, hmac));
+       if (err)
+               ubifs_err(c, "Failed to authenticate superblock: %d", err);
+
+       return err;
+}
+
 /**
  * ubifs_write_sb_node - write superblock node.
  * @c: UBIFS file-system description object
@@ -527,8 +606,13 @@ struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
 int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup)
 {
        int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
+       int err;
+
+       err = ubifs_prepare_node_hmac(c, sup, UBIFS_SB_NODE_SZ,
+                                     offsetof(struct ubifs_sb_node, hmac), 1);
+       if (err)
+               return err;
 
-       ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1);
        return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len);
 }
 
@@ -555,6 +639,8 @@ int ubifs_read_superblock(struct ubifs_info *c)
        if (IS_ERR(sup))
                return PTR_ERR(sup);
 
+       c->sup_node = sup;
+
        c->fmt_version = le32_to_cpu(sup->fmt_version);
        c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);
 
@@ -603,7 +689,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
                c->key_hash = key_test_hash;
                c->key_hash_type = UBIFS_KEY_HASH_TEST;
                break;
-       };
+       }
 
        c->key_fmt = sup->key_fmt;
 
@@ -640,6 +726,10 @@ int ubifs_read_superblock(struct ubifs_info *c)
        c->double_hash = !!(sup_flags & UBIFS_FLG_DOUBLE_HASH);
        c->encrypted = !!(sup_flags & UBIFS_FLG_ENCRYPTION);
 
+       err = authenticate_sb_node(c, sup);
+       if (err)
+               goto out;
+
        if ((sup_flags & ~UBIFS_FLG_MASK) != 0) {
                ubifs_err(c, "Unknown feature flags found: %#x",
                          sup_flags & ~UBIFS_FLG_MASK);
@@ -686,7 +776,6 @@ int ubifs_read_superblock(struct ubifs_info *c)
 
        err = validate_sb(c, sup);
 out:
-       kfree(sup);
        return err;
 }
 
@@ -815,7 +904,7 @@ out:
 int ubifs_fixup_free_space(struct ubifs_info *c)
 {
        int err;
-       struct ubifs_sb_node *sup;
+       struct ubifs_sb_node *sup = c->sup_node;
 
        ubifs_assert(c, c->space_fixup);
        ubifs_assert(c, !c->ro_mount);
@@ -826,16 +915,11 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
        if (err)
                return err;
 
-       sup = ubifs_read_sb_node(c);
-       if (IS_ERR(sup))
-               return PTR_ERR(sup);
-
        /* Free-space fixup is no longer required */
        c->space_fixup = 0;
        sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
 
        err = ubifs_write_sb_node(c, sup);
-       kfree(sup);
        if (err)
                return err;
 
@@ -846,7 +930,7 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
 int ubifs_enable_encryption(struct ubifs_info *c)
 {
        int err;
-       struct ubifs_sb_node *sup;
+       struct ubifs_sb_node *sup = c->sup_node;
 
        if (c->encrypted)
                return 0;
@@ -859,16 +943,11 @@ int ubifs_enable_encryption(struct ubifs_info *c)
                return -EINVAL;
        }
 
-       sup = ubifs_read_sb_node(c);
-       if (IS_ERR(sup))
-               return PTR_ERR(sup);
-
        sup->flags |= cpu_to_le32(UBIFS_FLG_ENCRYPTION);
 
        err = ubifs_write_sb_node(c, sup);
        if (!err)
                c->encrypted = 1;
-       kfree(sup);
 
        return err;
 }
index fec62e9dfbe6a6c639d7f61879bf21ac84ef4a6c..1fac1133dadd291b7491527802e70782ab8544e8 100644 (file)
@@ -579,6 +579,9 @@ static int init_constants_early(struct ubifs_info *c)
        c->ranges[UBIFS_REF_NODE].len  = UBIFS_REF_NODE_SZ;
        c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ;
        c->ranges[UBIFS_CS_NODE].len   = UBIFS_CS_NODE_SZ;
+       c->ranges[UBIFS_AUTH_NODE].min_len = UBIFS_AUTH_NODE_SZ;
+       c->ranges[UBIFS_AUTH_NODE].max_len = UBIFS_AUTH_NODE_SZ +
+                               UBIFS_MAX_HMAC_LEN;
 
        c->ranges[UBIFS_INO_NODE].min_len  = UBIFS_INO_NODE_SZ;
        c->ranges[UBIFS_INO_NODE].max_len  = UBIFS_MAX_INO_NODE_SZ;
@@ -816,6 +819,9 @@ static int alloc_wbufs(struct ubifs_info *c)
                c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
                c->jheads[i].wbuf.jhead = i;
                c->jheads[i].grouped = 1;
+               c->jheads[i].log_hash = ubifs_hash_get_desc(c);
+               if (IS_ERR(c->jheads[i].log_hash))
+                       goto out;
        }
 
        /*
@@ -826,6 +832,12 @@ static int alloc_wbufs(struct ubifs_info *c)
        c->jheads[GCHD].grouped = 0;
 
        return 0;
+
+out:
+       while (i--)
+               kfree(c->jheads[i].log_hash);
+
+       return err;
 }
 
 /**
@@ -840,6 +852,7 @@ static void free_wbufs(struct ubifs_info *c)
                for (i = 0; i < c->jhead_cnt; i++) {
                        kfree(c->jheads[i].wbuf.buf);
                        kfree(c->jheads[i].wbuf.inodes);
+                       kfree(c->jheads[i].log_hash);
                }
                kfree(c->jheads);
                c->jheads = NULL;
@@ -924,6 +937,8 @@ static int check_volume_empty(struct ubifs_info *c)
  * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
  * Opt_override_compr: override default compressor
  * Opt_assert: set ubifs_assert() action
+ * Opt_auth_key: The key name used for authentication
+ * Opt_auth_hash_name: The hash type used for authentication
  * Opt_err: just end of array marker
  */
 enum {
@@ -935,6 +950,8 @@ enum {
        Opt_no_chk_data_crc,
        Opt_override_compr,
        Opt_assert,
+       Opt_auth_key,
+       Opt_auth_hash_name,
        Opt_ignore,
        Opt_err,
 };
@@ -947,6 +964,8 @@ static const match_table_t tokens = {
        {Opt_chk_data_crc, "chk_data_crc"},
        {Opt_no_chk_data_crc, "no_chk_data_crc"},
        {Opt_override_compr, "compr=%s"},
+       {Opt_auth_key, "auth_key=%s"},
+       {Opt_auth_hash_name, "auth_hash_name=%s"},
        {Opt_ignore, "ubi=%s"},
        {Opt_ignore, "vol=%s"},
        {Opt_assert, "assert=%s"},
@@ -1070,6 +1089,16 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
                        kfree(act);
                        break;
                }
+               case Opt_auth_key:
+                       c->auth_key_name = kstrdup(args[0].from, GFP_KERNEL);
+                       if (!c->auth_key_name)
+                               return -ENOMEM;
+                       break;
+               case Opt_auth_hash_name:
+                       c->auth_hash_name = kstrdup(args[0].from, GFP_KERNEL);
+                       if (!c->auth_hash_name)
+                               return -ENOMEM;
+                       break;
                case Opt_ignore:
                        break;
                default:
@@ -1249,6 +1278,19 @@ static int mount_ubifs(struct ubifs_info *c)
 
        c->mounting = 1;
 
+       if (c->auth_key_name) {
+               if (IS_ENABLED(CONFIG_UBIFS_FS_AUTHENTICATION)) {
+                       err = ubifs_init_authentication(c);
+                       if (err)
+                               goto out_free;
+               } else {
+                       ubifs_err(c, "auth_key_name, but UBIFS is built without"
+                                 " authentication support");
+                       err = -EINVAL;
+                       goto out_free;
+               }
+       }
+
        err = ubifs_read_superblock(c);
        if (err)
                goto out_free;
@@ -1367,12 +1409,21 @@ static int mount_ubifs(struct ubifs_info *c)
                }
 
                if (c->need_recovery) {
-                       err = ubifs_recover_size(c);
-                       if (err)
-                               goto out_orphans;
+                       if (!ubifs_authenticated(c)) {
+                               err = ubifs_recover_size(c, true);
+                               if (err)
+                                       goto out_orphans;
+                       }
+
                        err = ubifs_rcvry_gc_commit(c);
                        if (err)
                                goto out_orphans;
+
+                       if (ubifs_authenticated(c)) {
+                               err = ubifs_recover_size(c, false);
+                               if (err)
+                                       goto out_orphans;
+                       }
                } else {
                        err = take_gc_lnum(c);
                        if (err)
@@ -1391,7 +1442,7 @@ static int mount_ubifs(struct ubifs_info *c)
                if (err)
                        goto out_orphans;
        } else if (c->need_recovery) {
-               err = ubifs_recover_size(c);
+               err = ubifs_recover_size(c, false);
                if (err)
                        goto out_orphans;
        } else {
@@ -1557,7 +1608,10 @@ static void ubifs_umount(struct ubifs_info *c)
        free_wbufs(c);
        free_orphans(c);
        ubifs_lpt_free(c, 0);
+       ubifs_exit_authentication(c);
 
+       kfree(c->auth_key_name);
+       kfree(c->auth_hash_name);
        kfree(c->cbuf);
        kfree(c->rcvrd_mst_node);
        kfree(c->mst_node);
@@ -1605,16 +1659,10 @@ static int ubifs_remount_rw(struct ubifs_info *c)
                goto out;
 
        if (c->old_leb_cnt != c->leb_cnt) {
-               struct ubifs_sb_node *sup;
+               struct ubifs_sb_node *sup = c->sup_node;
 
-               sup = ubifs_read_sb_node(c);
-               if (IS_ERR(sup)) {
-                       err = PTR_ERR(sup);
-                       goto out;
-               }
                sup->leb_cnt = cpu_to_le32(c->leb_cnt);
                err = ubifs_write_sb_node(c, sup);
-               kfree(sup);
                if (err)
                        goto out;
        }
@@ -1624,9 +1672,11 @@ static int ubifs_remount_rw(struct ubifs_info *c)
                err = ubifs_write_rcvrd_mst_node(c);
                if (err)
                        goto out;
-               err = ubifs_recover_size(c);
-               if (err)
-                       goto out;
+               if (!ubifs_authenticated(c)) {
+                       err = ubifs_recover_size(c, true);
+                       if (err)
+                               goto out;
+               }
                err = ubifs_clean_lebs(c, c->sbuf);
                if (err)
                        goto out;
@@ -1692,10 +1742,19 @@ static int ubifs_remount_rw(struct ubifs_info *c)
                        goto out;
        }
 
-       if (c->need_recovery)
+       if (c->need_recovery) {
                err = ubifs_rcvry_gc_commit(c);
-       else
+               if (err)
+                       goto out;
+
+               if (ubifs_authenticated(c)) {
+                       err = ubifs_recover_size(c, false);
+                       if (err)
+                               goto out;
+               }
+       } else {
                err = ubifs_leb_unmap(c, c->gc_lnum);
+       }
        if (err)
                goto out;
 
index bf416e5127431aae03a6211bbd89ab069bb7ceda..25572ffea1634964785aee3b05590cf439dddd6d 100644 (file)
@@ -35,7 +35,7 @@
 #include "ubifs.h"
 
 static int try_read_node(const struct ubifs_info *c, void *buf, int type,
-                        int len, int lnum, int offs);
+                        struct ubifs_zbranch *zbr);
 static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
                              struct ubifs_zbranch *zbr, void *node);
 
@@ -433,9 +433,7 @@ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
  * @c: UBIFS file-system description object
  * @buf: buffer to read to
  * @type: node type
- * @len: node length (not aligned)
- * @lnum: LEB number of node to read
- * @offs: offset of node to read
+ * @zbr: the zbranch describing the node to read
  *
  * This function tries to read a node of known type and length, checks it and
  * stores it in @buf. This function returns %1 if a node is present and %0 if
@@ -453,8 +451,11 @@ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
  * journal nodes may potentially be corrupted, so checking is required.
  */
 static int try_read_node(const struct ubifs_info *c, void *buf, int type,
-                        int len, int lnum, int offs)
+                        struct ubifs_zbranch *zbr)
 {
+       int len = zbr->len;
+       int lnum = zbr->lnum;
+       int offs = zbr->offs;
        int err, node_len;
        struct ubifs_ch *ch = buf;
        uint32_t crc, node_crc;
@@ -487,6 +488,12 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
        if (crc != node_crc)
                return 0;
 
+       err = ubifs_node_check_hash(c, buf, zbr->hash);
+       if (err) {
+               ubifs_bad_hash(c, buf, zbr->hash, lnum, offs);
+               return 0;
+       }
+
        return 1;
 }
 
@@ -507,8 +514,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
 
        dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs);
 
-       ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
-                           zbr->offs);
+       ret = try_read_node(c, node, key_type(c, key), zbr);
        if (ret == 1) {
                union ubifs_key node_key;
                struct ubifs_dent_node *dent = node;
@@ -1713,6 +1719,12 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
                goto out;
        }
 
+       err = ubifs_node_check_hash(c, buf, zbr->hash);
+       if (err) {
+               ubifs_bad_hash(c, buf, zbr->hash, zbr->lnum, zbr->offs);
+               return err;
+       }
+
        len = le32_to_cpu(ch->len);
        if (len != zbr->len) {
                ubifs_err(c, "bad node length %d, expected %d", len, zbr->len);
@@ -2260,13 +2272,14 @@ do_split:
  * @lnum: LEB number of node
  * @offs: node offset
  * @len: node length
+ * @hash: The hash over the node
  *
  * This function adds a node with key @key to TNC. The node may be new or it may
  * obsolete some existing one. Returns %0 on success or negative error code on
  * failure.
  */
 int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
-                 int offs, int len)
+                 int offs, int len, const u8 *hash)
 {
        int found, n, err = 0;
        struct ubifs_znode *znode;
@@ -2281,6 +2294,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
                zbr.lnum = lnum;
                zbr.offs = offs;
                zbr.len = len;
+               ubifs_copy_hash(c, hash, zbr.hash);
                key_copy(c, key, &zbr.key);
                err = tnc_insert(c, znode, &zbr, n + 1);
        } else if (found == 1) {
@@ -2291,6 +2305,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
                zbr->lnum = lnum;
                zbr->offs = offs;
                zbr->len = len;
+               ubifs_copy_hash(c, hash, zbr->hash);
        } else
                err = found;
        if (!err)
@@ -2392,13 +2407,14 @@ out_unlock:
  * @lnum: LEB number of node
  * @offs: node offset
  * @len: node length
+ * @hash: The hash over the node
  * @nm: node name
  *
  * This is the same as 'ubifs_tnc_add()' but it should be used with keys which
  * may have collisions, like directory entry keys.
  */
 int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
-                    int lnum, int offs, int len,
+                    int lnum, int offs, int len, const u8 *hash,
                     const struct fscrypt_name *nm)
 {
        int found, n, err = 0;
@@ -2441,6 +2457,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
                        zbr->lnum = lnum;
                        zbr->offs = offs;
                        zbr->len = len;
+                       ubifs_copy_hash(c, hash, zbr->hash);
                        goto out_unlock;
                }
        }
@@ -2452,6 +2469,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
                zbr.lnum = lnum;
                zbr.offs = offs;
                zbr.len = len;
+               ubifs_copy_hash(c, hash, zbr.hash);
                key_copy(c, key, &zbr.key);
                err = tnc_insert(c, znode, &zbr, n + 1);
                if (err)
index dba87d09b989375cea13b168bd43977cd43e60ef..dbcd2c350b65223209b95deeb657ae991e8181ab 100644 (file)
@@ -38,6 +38,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
                         struct ubifs_znode *znode, int lnum, int offs, int len)
 {
        struct ubifs_znode *zp;
+       u8 hash[UBIFS_HASH_ARR_SZ];
        int i, err;
 
        /* Make index node */
@@ -52,6 +53,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
                br->lnum = cpu_to_le32(zbr->lnum);
                br->offs = cpu_to_le32(zbr->offs);
                br->len = cpu_to_le32(zbr->len);
+               ubifs_copy_hash(c, zbr->hash, ubifs_branch_hash(c, br));
                if (!zbr->lnum || !zbr->len) {
                        ubifs_err(c, "bad ref in znode");
                        ubifs_dump_znode(c, znode);
@@ -62,6 +64,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
                }
        }
        ubifs_prepare_node(c, idx, len, 0);
+       ubifs_node_calc_hash(c, idx, hash);
 
        znode->lnum = lnum;
        znode->offs = offs;
@@ -78,10 +81,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
                zbr->lnum = lnum;
                zbr->offs = offs;
                zbr->len = len;
+               ubifs_copy_hash(c, hash, zbr->hash);
        } else {
                c->zroot.lnum = lnum;
                c->zroot.offs = offs;
                c->zroot.len = len;
+               ubifs_copy_hash(c, hash, c->zroot.hash);
        }
        c->calc_idx_sz += ALIGN(len, 8);
 
@@ -647,6 +652,8 @@ static int get_znodes_to_commit(struct ubifs_info *c)
                        znode->cnext = c->cnext;
                        break;
                }
+               znode->cparent = znode->parent;
+               znode->ciip = znode->iip;
                znode->cnext = cnext;
                znode = cnext;
                cnt += 1;
@@ -840,6 +847,8 @@ static int write_index(struct ubifs_info *c)
        }
 
        while (1) {
+               u8 hash[UBIFS_HASH_ARR_SZ];
+
                cond_resched();
 
                znode = cnext;
@@ -857,6 +866,7 @@ static int write_index(struct ubifs_info *c)
                        br->lnum = cpu_to_le32(zbr->lnum);
                        br->offs = cpu_to_le32(zbr->offs);
                        br->len = cpu_to_le32(zbr->len);
+                       ubifs_copy_hash(c, zbr->hash, ubifs_branch_hash(c, br));
                        if (!zbr->lnum || !zbr->len) {
                                ubifs_err(c, "bad ref in znode");
                                ubifs_dump_znode(c, znode);
@@ -868,6 +878,23 @@ static int write_index(struct ubifs_info *c)
                }
                len = ubifs_idx_node_sz(c, znode->child_cnt);
                ubifs_prepare_node(c, idx, len, 0);
+               ubifs_node_calc_hash(c, idx, hash);
+
+               mutex_lock(&c->tnc_mutex);
+
+               if (znode->cparent)
+                       ubifs_copy_hash(c, hash,
+                                       znode->cparent->zbranch[znode->ciip].hash);
+
+               if (znode->parent) {
+                       if (!ubifs_zn_obsolete(znode))
+                               ubifs_copy_hash(c, hash,
+                                       znode->parent->zbranch[znode->iip].hash);
+               } else {
+                       ubifs_copy_hash(c, hash, c->zroot.hash);
+               }
+
+               mutex_unlock(&c->tnc_mutex);
 
                /* Determine the index node position */
                if (lnum == -1) {
index d90ee01076a9ea5559dd8afb26cffc4e61a5cb81..d1815e9590071940059186923bb4dc21e8bb9cce 100644 (file)
@@ -265,9 +265,7 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
 /**
  * read_znode - read an indexing node from flash and fill znode.
  * @c: UBIFS file-system description object
- * @lnum: LEB of the indexing node to read
- * @offs: node offset
- * @len: node length
+ * @zzbr: the zbranch describing the node to read
  * @znode: znode to read to
  *
  * This function reads an indexing node from the flash media and fills znode
@@ -276,9 +274,12 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
  * is wrong with it, this function prints complaint messages and returns
  * %-EINVAL.
  */
-static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
+static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr,
                      struct ubifs_znode *znode)
 {
+       int lnum = zzbr->lnum;
+       int offs = zzbr->offs;
+       int len = zzbr->len;
        int i, err, type, cmp;
        struct ubifs_idx_node *idx;
 
@@ -292,6 +293,12 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
                return err;
        }
 
+       err = ubifs_node_check_hash(c, idx, zzbr->hash);
+       if (err) {
+               ubifs_bad_hash(c, idx, zzbr->hash, lnum, offs);
+               return err;
+       }
+
        znode->child_cnt = le16_to_cpu(idx->child_cnt);
        znode->level = le16_to_cpu(idx->level);
 
@@ -308,13 +315,14 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
        }
 
        for (i = 0; i < znode->child_cnt; i++) {
-               const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
+               struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
                struct ubifs_zbranch *zbr = &znode->zbranch[i];
 
                key_read(c, &br->key, &zbr->key);
                zbr->lnum = le32_to_cpu(br->lnum);
                zbr->offs = le32_to_cpu(br->offs);
                zbr->len  = le32_to_cpu(br->len);
+               ubifs_copy_hash(c, ubifs_branch_hash(c, br), zbr->hash);
                zbr->znode = NULL;
 
                /* Validate branch */
@@ -425,7 +433,7 @@ struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
        if (!znode)
                return ERR_PTR(-ENOMEM);
 
-       err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode);
+       err = read_znode(c, zbr, znode);
        if (err)
                goto out;
 
@@ -496,5 +504,11 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
                return -EINVAL;
        }
 
+       err = ubifs_node_check_hash(c, node, zbr->hash);
+       if (err) {
+               ubifs_bad_hash(c, node, zbr->hash, zbr->lnum, zbr->offs);
+               return err;
+       }
+
        return 0;
 }
index e8c23c9d4f4a7c67aef3f94092038e8fa58fbfb0..8b7c1844014ffbbeb1dcc93ce4be8dc1cbc65742 100644 (file)
@@ -286,6 +286,7 @@ enum {
 #define UBIFS_IDX_NODE_SZ  sizeof(struct ubifs_idx_node)
 #define UBIFS_CS_NODE_SZ   sizeof(struct ubifs_cs_node)
 #define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node)
+#define UBIFS_AUTH_NODE_SZ sizeof(struct ubifs_auth_node)
 /* Extended attribute entry nodes are identical to directory entry nodes */
 #define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ
 /* Only this does not have to be multiple of 8 bytes */
@@ -300,6 +301,12 @@ enum {
 /* The largest UBIFS node */
 #define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ
 
+/* The maxmimum size of a hash, enough for sha512 */
+#define UBIFS_MAX_HASH_LEN 64
+
+/* The maxmimum size of a hmac, enough for hmac(sha512) */
+#define UBIFS_MAX_HMAC_LEN 64
+
 /*
  * xattr name of UBIFS encryption context, we don't use a prefix
  * nor a long name to not waste space on the flash.
@@ -365,6 +372,7 @@ enum {
  * UBIFS_IDX_NODE: index node
  * UBIFS_CS_NODE: commit start node
  * UBIFS_ORPH_NODE: orphan node
+ * UBIFS_AUTH_NODE: authentication node
  * UBIFS_NODE_TYPES_CNT: count of supported node types
  *
  * Note, we index arrays by these numbers, so keep them low and contiguous.
@@ -384,6 +392,7 @@ enum {
        UBIFS_IDX_NODE,
        UBIFS_CS_NODE,
        UBIFS_ORPH_NODE,
+       UBIFS_AUTH_NODE,
        UBIFS_NODE_TYPES_CNT,
 };
 
@@ -421,15 +430,19 @@ enum {
  * UBIFS_FLG_DOUBLE_HASH: store a 32bit cookie in directory entry nodes to
  *                       support 64bit cookies for lookups by hash
  * UBIFS_FLG_ENCRYPTION: this filesystem contains encrypted files
+ * UBIFS_FLG_AUTHENTICATION: this filesystem contains hashes for authentication
  */
 enum {
        UBIFS_FLG_BIGLPT = 0x02,
        UBIFS_FLG_SPACE_FIXUP = 0x04,
        UBIFS_FLG_DOUBLE_HASH = 0x08,
        UBIFS_FLG_ENCRYPTION = 0x10,
+       UBIFS_FLG_AUTHENTICATION = 0x20,
 };
 
-#define UBIFS_FLG_MASK (UBIFS_FLG_BIGLPT|UBIFS_FLG_SPACE_FIXUP|UBIFS_FLG_DOUBLE_HASH|UBIFS_FLG_ENCRYPTION)
+#define UBIFS_FLG_MASK (UBIFS_FLG_BIGLPT | UBIFS_FLG_SPACE_FIXUP | \
+               UBIFS_FLG_DOUBLE_HASH | UBIFS_FLG_ENCRYPTION | \
+               UBIFS_FLG_AUTHENTICATION)
 
 /**
  * struct ubifs_ch - common header node.
@@ -633,6 +646,10 @@ struct ubifs_pad_node {
  * @time_gran: time granularity in nanoseconds
  * @uuid: UUID generated when the file system image was created
  * @ro_compat_version: UBIFS R/O compatibility version
+ * @hmac: HMAC to authenticate the superblock node
+ * @hmac_wkm: HMAC of a well known message (the string "UBIFS") as a convenience
+ *            to the user to check if the correct key is passed.
+ * @hash_algo: The hash algo used for this filesystem (one of enum hash_algo)
  */
 struct ubifs_sb_node {
        struct ubifs_ch ch;
@@ -660,7 +677,10 @@ struct ubifs_sb_node {
        __le32 time_gran;
        __u8 uuid[16];
        __le32 ro_compat_version;
-       __u8 padding2[3968];
+       __u8 hmac[UBIFS_MAX_HMAC_LEN];
+       __u8 hmac_wkm[UBIFS_MAX_HMAC_LEN];
+       __le16 hash_algo;
+       __u8 padding2[3838];
 } __packed;
 
 /**
@@ -695,6 +715,9 @@ struct ubifs_sb_node {
  * @empty_lebs: number of empty logical eraseblocks
  * @idx_lebs: number of indexing logical eraseblocks
  * @leb_cnt: count of LEBs used by file-system
+ * @hash_root_idx: the hash of the root index node
+ * @hash_lpt: the hash of the LPT
+ * @hmac: HMAC to authenticate the master node
  * @padding: reserved for future, zeroes
  */
 struct ubifs_mst_node {
@@ -727,7 +750,10 @@ struct ubifs_mst_node {
        __le32 empty_lebs;
        __le32 idx_lebs;
        __le32 leb_cnt;
-       __u8 padding[344];
+       __u8 hash_root_idx[UBIFS_MAX_HASH_LEN];
+       __u8 hash_lpt[UBIFS_MAX_HASH_LEN];
+       __u8 hmac[UBIFS_MAX_HMAC_LEN];
+       __u8 padding[152];
 } __packed;
 
 /**
@@ -746,12 +772,26 @@ struct ubifs_ref_node {
        __u8 padding[28];
 } __packed;
 
+/**
+ * struct ubifs_auth_node - node for authenticating other nodes
+ * @ch: common header
+ * @hmac: The HMAC
+ */
+struct ubifs_auth_node {
+       struct ubifs_ch ch;
+       __u8 hmac[];
+} __packed;
+
 /**
  * struct ubifs_branch - key/reference/length branch
  * @lnum: LEB number of the target node
  * @offs: offset within @lnum
  * @len: target node length
  * @key: key
+ *
+ * In an authenticated UBIFS we have the hash of the referenced node after @key.
+ * This can't be added to the struct type definition because @key is a
+ * dynamically sized element already.
  */
 struct ubifs_branch {
        __le32 lnum;
index 4368cde476b0fa4360ab942790fb70ff62937530..38401adaa00d63a2f975e9900143d7a061336826 100644 (file)
@@ -39,6 +39,9 @@
 #include <linux/security.h>
 #include <linux/xattr.h>
 #include <linux/random.h>
+#include <crypto/hash_info.h>
+#include <crypto/hash.h>
+#include <crypto/algapi.h>
 
 #define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_UBIFS_FS_ENCRYPTION)
 #include <linux/fscrypt.h>
 /* Maximum number of data nodes to bulk-read */
 #define UBIFS_MAX_BULK_READ 32
 
+#ifdef CONFIG_UBIFS_FS_AUTHENTICATION
+#define UBIFS_HASH_ARR_SZ UBIFS_MAX_HASH_LEN
+#define UBIFS_HMAC_ARR_SZ UBIFS_MAX_HMAC_LEN
+#else
+#define UBIFS_HASH_ARR_SZ 0
+#define UBIFS_HMAC_ARR_SZ 0
+#endif
+
 /*
  * Lockdep classes for UBIFS inode @ui_mutex.
  */
@@ -706,6 +717,7 @@ struct ubifs_wbuf {
  * @jhead: journal head number this bud belongs to
  * @list: link in the list buds belonging to the same journal head
  * @rb: link in the tree of all buds
+ * @log_hash: the log hash from the commit start node up to this bud
  */
 struct ubifs_bud {
        int lnum;
@@ -713,6 +725,7 @@ struct ubifs_bud {
        int jhead;
        struct list_head list;
        struct rb_node rb;
+       struct shash_desc *log_hash;
 };
 
 /**
@@ -720,6 +733,7 @@ struct ubifs_bud {
  * @wbuf: head's write-buffer
  * @buds_list: list of bud LEBs belonging to this journal head
  * @grouped: non-zero if UBIFS groups nodes when writing to this journal head
+ * @log_hash: the log hash from the commit start node up to this journal head
  *
  * Note, the @buds list is protected by the @c->buds_lock.
  */
@@ -727,6 +741,7 @@ struct ubifs_jhead {
        struct ubifs_wbuf wbuf;
        struct list_head buds_list;
        unsigned int grouped:1;
+       struct shash_desc *log_hash;
 };
 
 /**
@@ -736,6 +751,7 @@ struct ubifs_jhead {
  * @lnum: LEB number of the target node (indexing node or data node)
  * @offs: target node offset within @lnum
  * @len: target node length
+ * @hash: the hash of the target node
  */
 struct ubifs_zbranch {
        union ubifs_key key;
@@ -746,12 +762,15 @@ struct ubifs_zbranch {
        int lnum;
        int offs;
        int len;
+       u8 hash[UBIFS_HASH_ARR_SZ];
 };
 
 /**
  * struct ubifs_znode - in-memory representation of an indexing node.
  * @parent: parent znode or NULL if it is the root
  * @cnext: next znode to commit
+ * @cparent: parent node for this commit
+ * @ciip: index in cparent's zbranch array
  * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE)
  * @time: last access time (seconds)
  * @level: level of the entry in the TNC tree
@@ -769,6 +788,8 @@ struct ubifs_zbranch {
 struct ubifs_znode {
        struct ubifs_znode *parent;
        struct ubifs_znode *cnext;
+       struct ubifs_znode *cparent;
+       int ciip;
        unsigned long flags;
        time64_t time;
        int level;
@@ -983,6 +1004,7 @@ struct ubifs_debug_info;
  * struct ubifs_info - UBIFS file-system description data structure
  * (per-superblock).
  * @vfs_sb: VFS @struct super_block object
+ * @sup_node: The super block node as read from the device
  *
  * @highest_inum: highest used inode number
  * @max_sqnum: current global sequence number
@@ -1028,6 +1050,7 @@ struct ubifs_debug_info;
  * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
  * @rw_incompat: the media is not R/W compatible
  * @assert_action: action to take when a ubifs_assert() fails
+ * @authenticated: flag indigating the FS is mounted in authenticated mode
  *
  * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
  *             @calc_idx_sz
@@ -1075,6 +1098,7 @@ struct ubifs_debug_info;
  * @key_hash: direntry key hash function
  * @key_fmt: key format
  * @key_len: key length
+ * @hash_len: The length of the index node hashes
  * @fanout: fanout of the index tree (number of links per indexing node)
  *
  * @min_io_size: minimal input/output unit size
@@ -1210,6 +1234,15 @@ struct ubifs_debug_info;
  * @rp_uid: reserved pool user ID
  * @rp_gid: reserved pool group ID
  *
+ * @hash_tfm: the hash transformation used for hashing nodes
+ * @hmac_tfm: the HMAC transformation for this filesystem
+ * @hmac_desc_len: length of the HMAC used for authentication
+ * @auth_key_name: the authentication key name
+ * @auth_hash_name: the name of the hash algorithm used for authentication
+ * @auth_hash_algo: the authentication hash used for this fs
+ * @log_hash: the log hash from the commit start node up to the latest reference
+ *            node.
+ *
  * @empty: %1 if the UBI device is empty
  * @need_recovery: %1 if the file-system needs recovery
  * @replaying: %1 during journal replay
@@ -1230,6 +1263,7 @@ struct ubifs_debug_info;
  */
 struct ubifs_info {
        struct super_block *vfs_sb;
+       struct ubifs_sb_node *sup_node;
 
        ino_t highest_inum;
        unsigned long long max_sqnum;
@@ -1270,6 +1304,7 @@ struct ubifs_info {
        unsigned int default_compr:2;
        unsigned int rw_incompat:1;
        unsigned int assert_action:2;
+       unsigned int authenticated:1;
 
        struct mutex tnc_mutex;
        struct ubifs_zbranch zroot;
@@ -1314,6 +1349,7 @@ struct ubifs_info {
        uint32_t (*key_hash)(const char *str, int len);
        int key_fmt;
        int key_len;
+       int hash_len;
        int fanout;
 
        int min_io_size;
@@ -1441,6 +1477,15 @@ struct ubifs_info {
        kuid_t rp_uid;
        kgid_t rp_gid;
 
+       struct crypto_shash *hash_tfm;
+       struct crypto_shash *hmac_tfm;
+       int hmac_desc_len;
+       char *auth_key_name;
+       char *auth_hash_name;
+       enum hash_algo auth_hash_algo;
+
+       struct shash_desc *log_hash;
+
        /* The below fields are used only during mounting and re-mounting */
        unsigned int empty:1;
        unsigned int need_recovery:1;
@@ -1471,6 +1516,195 @@ extern const struct inode_operations ubifs_dir_inode_operations;
 extern const struct inode_operations ubifs_symlink_inode_operations;
 extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
 
+/* auth.c */
+static inline int ubifs_authenticated(const struct ubifs_info *c)
+{
+       return (IS_ENABLED(CONFIG_UBIFS_FS_AUTHENTICATION)) && c->authenticated;
+}
+
+struct shash_desc *__ubifs_hash_get_desc(const struct ubifs_info *c);
+static inline struct shash_desc *ubifs_hash_get_desc(const struct ubifs_info *c)
+{
+       return ubifs_authenticated(c) ? __ubifs_hash_get_desc(c) : NULL;
+}
+
+static inline int ubifs_shash_init(const struct ubifs_info *c,
+                                  struct shash_desc *desc)
+{
+       if (ubifs_authenticated(c))
+               return crypto_shash_init(desc);
+       else
+               return 0;
+}
+
+static inline int ubifs_shash_update(const struct ubifs_info *c,
+                                     struct shash_desc *desc, const void *buf,
+                                     unsigned int len)
+{
+       int err = 0;
+
+       if (ubifs_authenticated(c)) {
+               err = crypto_shash_update(desc, buf, len);
+               if (err < 0)
+                       return err;
+       }
+
+       return 0;
+}
+
+static inline int ubifs_shash_final(const struct ubifs_info *c,
+                                   struct shash_desc *desc, u8 *out)
+{
+       return ubifs_authenticated(c) ? crypto_shash_final(desc, out) : 0;
+}
+
+int __ubifs_node_calc_hash(const struct ubifs_info *c, const void *buf,
+                         u8 *hash);
+static inline int ubifs_node_calc_hash(const struct ubifs_info *c,
+                                       const void *buf, u8 *hash)
+{
+       if (ubifs_authenticated(c))
+               return __ubifs_node_calc_hash(c, buf, hash);
+       else
+               return 0;
+}
+
+int ubifs_prepare_auth_node(struct ubifs_info *c, void *node,
+                            struct shash_desc *inhash);
+
+/**
+ * ubifs_check_hash - compare two hashes
+ * @c: UBIFS file-system description object
+ * @expected: first hash
+ * @got: second hash
+ *
+ * Compare two hashes @expected and @got. Returns 0 when they are equal, a
+ * negative error code otherwise.
+ */
+static inline int ubifs_check_hash(const struct ubifs_info *c,
+                                  const u8 *expected, const u8 *got)
+{
+       return crypto_memneq(expected, got, c->hash_len);
+}
+
+/**
+ * ubifs_check_hmac - compare two HMACs
+ * @c: UBIFS file-system description object
+ * @expected: first HMAC
+ * @got: second HMAC
+ *
+ * Compare two hashes @expected and @got. Returns 0 when they are equal, a
+ * negative error code otherwise.
+ */
+static inline int ubifs_check_hmac(const struct ubifs_info *c,
+                                  const u8 *expected, const u8 *got)
+{
+       return crypto_memneq(expected, got, c->hmac_desc_len);
+}
+
+void ubifs_bad_hash(const struct ubifs_info *c, const void *node,
+                   const u8 *hash, int lnum, int offs);
+
+int __ubifs_node_check_hash(const struct ubifs_info *c, const void *buf,
+                         const u8 *expected);
+static inline int ubifs_node_check_hash(const struct ubifs_info *c,
+                                       const void *buf, const u8 *expected)
+{
+       if (ubifs_authenticated(c))
+               return __ubifs_node_check_hash(c, buf, expected);
+       else
+               return 0;
+}
+
+int ubifs_init_authentication(struct ubifs_info *c);
+void __ubifs_exit_authentication(struct ubifs_info *c);
+static inline void ubifs_exit_authentication(struct ubifs_info *c)
+{
+       if (ubifs_authenticated(c))
+               __ubifs_exit_authentication(c);
+}
+
+/**
+ * ubifs_branch_hash - returns a pointer to the hash of a branch
+ * @c: UBIFS file-system description object
+ * @br: branch to get the hash from
+ *
+ * This returns a pointer to the hash of a branch. Since the key already is a
+ * dynamically sized object we cannot use a struct member here.
+ */
+static inline u8 *ubifs_branch_hash(struct ubifs_info *c,
+                                   struct ubifs_branch *br)
+{
+       return (void *)br + sizeof(*br) + c->key_len;
+}
+
+/**
+ * ubifs_copy_hash - copy a hash
+ * @c: UBIFS file-system description object
+ * @from: source hash
+ * @to: destination hash
+ *
+ * With authentication this copies a hash, otherwise does nothing.
+ */
+static inline void ubifs_copy_hash(const struct ubifs_info *c, const u8 *from,
+                                  u8 *to)
+{
+       if (ubifs_authenticated(c))
+               memcpy(to, from, c->hash_len);
+}
+
+int __ubifs_node_insert_hmac(const struct ubifs_info *c, void *buf,
+                             int len, int ofs_hmac);
+static inline int ubifs_node_insert_hmac(const struct ubifs_info *c, void *buf,
+                                         int len, int ofs_hmac)
+{
+       if (ubifs_authenticated(c))
+               return __ubifs_node_insert_hmac(c, buf, len, ofs_hmac);
+       else
+               return 0;
+}
+
+int __ubifs_node_verify_hmac(const struct ubifs_info *c, const void *buf,
+                            int len, int ofs_hmac);
+static inline int ubifs_node_verify_hmac(const struct ubifs_info *c,
+                                        const void *buf, int len, int ofs_hmac)
+{
+       if (ubifs_authenticated(c))
+               return __ubifs_node_verify_hmac(c, buf, len, ofs_hmac);
+       else
+               return 0;
+}
+
+/**
+ * ubifs_auth_node_sz - returns the size of an authentication node
+ * @c: UBIFS file-system description object
+ *
+ * This function returns the size of an authentication node which can
+ * be 0 for unauthenticated filesystems or the real size of an auth node
+ * authentication is enabled.
+ */
+static inline int ubifs_auth_node_sz(const struct ubifs_info *c)
+{
+       if (ubifs_authenticated(c))
+               return sizeof(struct ubifs_auth_node) + c->hmac_desc_len;
+       else
+               return 0;
+}
+
+int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac);
+
+int __ubifs_shash_copy_state(const struct ubifs_info *c, struct shash_desc *src,
+                            struct shash_desc *target);
+static inline int ubifs_shash_copy_state(const struct ubifs_info *c,
+                                          struct shash_desc *src,
+                                          struct shash_desc *target)
+{
+       if (ubifs_authenticated(c))
+               return __ubifs_shash_copy_state(c, src, target);
+       else
+               return 0;
+}
+
 /* io.c */
 void ubifs_ro_mode(struct ubifs_info *c, int err);
 int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
@@ -1490,9 +1724,15 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
                         int lnum, int offs);
 int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
                     int offs);
+int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum,
+                         int offs, int hmac_offs);
 int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
                     int offs, int quiet, int must_chk_crc);
+void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad);
+void ubifs_crc_node(struct ubifs_info *c, void *buf, int len);
 void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
+int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len,
+                           int hmac_offs, int pad);
 void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
 int ubifs_io_init(struct ubifs_info *c);
 void ubifs_pad(const struct ubifs_info *c, void *buf, int pad);
@@ -1592,11 +1832,12 @@ int ubifs_tnc_lookup_dh(struct ubifs_info *c, const union ubifs_key *key,
 int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
                     void *node, int *lnum, int *offs);
 int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
-                 int offs, int len);
+                 int offs, int len, const u8 *hash);
 int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
                      int old_lnum, int old_offs, int lnum, int offs, int len);
 int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
-                    int lnum, int offs, int len, const struct fscrypt_name *nm);
+                    int lnum, int offs, int len, const u8 *hash,
+                    const struct fscrypt_name *nm);
 int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key);
 int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
                        const struct fscrypt_name *nm);
@@ -1659,12 +1900,12 @@ int ubifs_gc_should_commit(struct ubifs_info *c);
 void ubifs_wait_for_commit(struct ubifs_info *c);
 
 /* master.c */
+int ubifs_compare_master_node(struct ubifs_info *c, void *m1, void *m2);
 int ubifs_read_master(struct ubifs_info *c);
 int ubifs_write_master(struct ubifs_info *c);
 
 /* sb.c */
 int ubifs_read_superblock(struct ubifs_info *c);
-struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
 int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
 int ubifs_fixup_free_space(struct ubifs_info *c);
 int ubifs_enable_encryption(struct ubifs_info *c);
@@ -1693,7 +1934,7 @@ int ubifs_clear_orphans(struct ubifs_info *c);
 /* lpt.c */
 int ubifs_calc_lpt_geom(struct ubifs_info *c);
 int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
-                         int *lpt_lebs, int *big_lpt);
+                         int *lpt_lebs, int *big_lpt, u8 *hash);
 int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr);
 struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum);
 struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum);
@@ -1712,6 +1953,7 @@ struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
                                    struct ubifs_nnode *parent, int iip);
 struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
                                    struct ubifs_nnode *parent, int iip);
+struct ubifs_pnode *ubifs_pnode_lookup(struct ubifs_info *c, int i);
 int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip);
 void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
 void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
@@ -1720,6 +1962,7 @@ struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
 /* Needed only in debugging code in lpt_commit.c */
 int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
                       struct ubifs_nnode *nnode);
+int ubifs_lpt_calc_hash(struct ubifs_info *c, u8 *hash);
 
 /* lpt_commit.c */
 int ubifs_lpt_start_commit(struct ubifs_info *c);
@@ -1807,7 +2050,7 @@ int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf);
 int ubifs_rcvry_gc_commit(struct ubifs_info *c);
 int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
                             int deletion, loff_t new_size);
-int ubifs_recover_size(struct ubifs_info *c);
+int ubifs_recover_size(struct ubifs_info *c, bool in_place);
 void ubifs_destroy_size_tree(struct ubifs_info *c);
 
 /* ioctl.c */
index 6fc5425b1474a52694b4860aa79cefc52d3fd826..2652d00842d6ba8c6479f816765c87dfc622d1cb 100644 (file)
@@ -243,7 +243,7 @@ xfs_attr3_leaf_verify(
        struct xfs_mount                *mp = bp->b_target->bt_mount;
        struct xfs_attr_leafblock       *leaf = bp->b_addr;
        struct xfs_attr_leaf_entry      *entries;
-       uint16_t                        end;
+       uint32_t                        end;    /* must be 32bit - see below */
        int                             i;
 
        xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
@@ -293,6 +293,11 @@ xfs_attr3_leaf_verify(
        /*
         * Quickly check the freemap information.  Attribute data has to be
         * aligned to 4-byte boundaries, and likewise for the free space.
+        *
+        * Note that for 64k block size filesystems, the freemap entries cannot
+        * overflow as they are only be16 fields. However, when checking end
+        * pointer of the freemap, we have to be careful to detect overflows and
+        * so use uint32_t for those checks.
         */
        for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
                if (ichdr.freemap[i].base > mp->m_attr_geo->blksize)
@@ -303,7 +308,9 @@ xfs_attr3_leaf_verify(
                        return __this_address;
                if (ichdr.freemap[i].size & 0x3)
                        return __this_address;
-               end = ichdr.freemap[i].base + ichdr.freemap[i].size;
+
+               /* be care of 16 bit overflows here */
+               end = (uint32_t)ichdr.freemap[i].base + ichdr.freemap[i].size;
                if (end < ichdr.freemap[i].base)
                        return __this_address;
                if (end > mp->m_attr_geo->blksize)
index 61a5ad2600e865a6b11a8345f956eba10203abd2..53c9ab8fb777f4d78803da906b151624cfc7053f 100644 (file)
@@ -919,28 +919,67 @@ out_unlock:
        return error;
 }
 
-STATIC int
-xfs_file_clone_range(
-       struct file     *file_in,
-       loff_t          pos_in,
-       struct file     *file_out,
-       loff_t          pos_out,
-       u64             len)
-{
-       return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-                                    len, false);
-}
 
-STATIC int
-xfs_file_dedupe_range(
-       struct file     *file_in,
-       loff_t          pos_in,
-       struct file     *file_out,
-       loff_t          pos_out,
-       u64             len)
+loff_t
+xfs_file_remap_range(
+       struct file             *file_in,
+       loff_t                  pos_in,
+       struct file             *file_out,
+       loff_t                  pos_out,
+       loff_t                  len,
+       unsigned int            remap_flags)
 {
-       return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-                                    len, true);
+       struct inode            *inode_in = file_inode(file_in);
+       struct xfs_inode        *src = XFS_I(inode_in);
+       struct inode            *inode_out = file_inode(file_out);
+       struct xfs_inode        *dest = XFS_I(inode_out);
+       struct xfs_mount        *mp = src->i_mount;
+       loff_t                  remapped = 0;
+       xfs_extlen_t            cowextsize;
+       int                     ret;
+
+       if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+               return -EINVAL;
+
+       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+               return -EOPNOTSUPP;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       /* Prepare and then clone file data. */
+       ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
+                       &len, remap_flags);
+       if (ret < 0 || len == 0)
+               return ret;
+
+       trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+
+       ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
+                       &remapped);
+       if (ret)
+               goto out_unlock;
+
+       /*
+        * Carry the cowextsize hint from src to dest if we're sharing the
+        * entire source file to the entire destination file, the source file
+        * has a cowextsize hint, and the destination file does not.
+        */
+       cowextsize = 0;
+       if (pos_in == 0 && len == i_size_read(inode_in) &&
+           (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
+           pos_out == 0 && len >= i_size_read(inode_out) &&
+           !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
+               cowextsize = src->i_d.di_cowextsize;
+
+       ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+                       remap_flags);
+
+out_unlock:
+       xfs_reflink_remap_unlock(file_in, file_out);
+       if (ret)
+               trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+       return remapped > 0 ? remapped : ret;
 }
 
 STATIC int
@@ -1175,8 +1214,7 @@ const struct file_operations xfs_file_operations = {
        .fsync          = xfs_file_fsync,
        .get_unmapped_area = thp_get_unmapped_area,
        .fallocate      = xfs_file_fallocate,
-       .clone_file_range = xfs_file_clone_range,
-       .dedupe_file_range = xfs_file_dedupe_range,
+       .remap_file_range = xfs_file_remap_range,
 };
 
 const struct file_operations xfs_dir_file_operations = {
index 6e2c08f30f602deb360e737003cc3ae1abf4bfc7..6ecdbb3af7de5c02c86a25d41ed7086ae2f845fc 100644 (file)
@@ -1608,7 +1608,7 @@ xfs_ioc_getbmap(
        error = 0;
 out_free_buf:
        kmem_free(buf);
-       return 0;
+       return error;
 }
 
 struct getfsmap_info {
index 576c375ce12a8f411a49f75cf6bd72f69c96279c..6b736ea58d35402eb7e7975067a4303131cf3d83 100644 (file)
@@ -107,5 +107,5 @@ assfail(char *expr, char *file, int line)
 void
 xfs_hex_dump(void *p, int length)
 {
-       print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
+       print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
 }
index 8eaeec9d58ed6799898753f49f0ad895b5db4cb5..ecdb086bc23e559f6fcfc45ca70590533d04b3c7 100644 (file)
@@ -913,18 +913,18 @@ out_error:
 /*
  * Update destination inode size & cowextsize hint, if necessary.
  */
-STATIC int
+int
 xfs_reflink_update_dest(
        struct xfs_inode        *dest,
        xfs_off_t               newlen,
        xfs_extlen_t            cowextsize,
-       bool                    is_dedupe)
+       unsigned int            remap_flags)
 {
        struct xfs_mount        *mp = dest->i_mount;
        struct xfs_trans        *tp;
        int                     error;
 
-       if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+       if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
                return 0;
 
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -945,10 +945,6 @@ xfs_reflink_update_dest(
                dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
        }
 
-       if (!is_dedupe) {
-               xfs_trans_ichgtime(tp, dest,
-                                  XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       }
        xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
 
        error = xfs_trans_commit(tp);
@@ -1112,19 +1108,28 @@ out:
 /*
  * Iteratively remap one file's extents (and holes) to another's.
  */
-STATIC int
+int
 xfs_reflink_remap_blocks(
        struct xfs_inode        *src,
-       xfs_fileoff_t           srcoff,
+       loff_t                  pos_in,
        struct xfs_inode        *dest,
-       xfs_fileoff_t           destoff,
-       xfs_filblks_t           len,
-       xfs_off_t               new_isize)
+       loff_t                  pos_out,
+       loff_t                  remap_len,
+       loff_t                  *remapped)
 {
        struct xfs_bmbt_irec    imap;
+       xfs_fileoff_t           srcoff;
+       xfs_fileoff_t           destoff;
+       xfs_filblks_t           len;
+       xfs_filblks_t           range_len;
+       xfs_filblks_t           remapped_len = 0;
+       xfs_off_t               new_isize = pos_out + remap_len;
        int                     nimaps;
        int                     error = 0;
-       xfs_filblks_t           range_len;
+
+       destoff = XFS_B_TO_FSBT(src->i_mount, pos_out);
+       srcoff = XFS_B_TO_FSBT(src->i_mount, pos_in);
+       len = XFS_B_TO_FSB(src->i_mount, remap_len);
 
        /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
        while (len) {
@@ -1139,7 +1144,7 @@ xfs_reflink_remap_blocks(
                error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
                xfs_iunlock(src, lock_mode);
                if (error)
-                       goto err;
+                       break;
                ASSERT(nimaps == 1);
 
                trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE,
@@ -1153,23 +1158,24 @@ xfs_reflink_remap_blocks(
                error = xfs_reflink_remap_extent(dest, &imap, destoff,
                                new_isize);
                if (error)
-                       goto err;
+                       break;
 
                if (fatal_signal_pending(current)) {
                        error = -EINTR;
-                       goto err;
+                       break;
                }
 
                /* Advance drange/srange */
                srcoff += range_len;
                destoff += range_len;
                len -= range_len;
+               remapped_len += range_len;
        }
 
-       return 0;
-
-err:
-       trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
+       if (error)
+               trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
+       *remapped = min_t(loff_t, remap_len,
+                         XFS_FSB_TO_B(src->i_mount, remapped_len));
        return error;
 }
 
@@ -1218,7 +1224,7 @@ retry:
 }
 
 /* Unlock both inodes after they've been prepped for a range clone. */
-STATIC void
+void
 xfs_reflink_remap_unlock(
        struct file             *file_in,
        struct file             *file_out)
@@ -1286,21 +1292,20 @@ xfs_reflink_zero_posteof(
  * stale data in the destination file. Hence we reject these clone attempts with
  * -EINVAL in this case.
  */
-STATIC int
+int
 xfs_reflink_remap_prep(
        struct file             *file_in,
        loff_t                  pos_in,
        struct file             *file_out,
        loff_t                  pos_out,
-       u64                     *len,
-       bool                    is_dedupe)
+       loff_t                  *len,
+       unsigned int            remap_flags)
 {
        struct inode            *inode_in = file_inode(file_in);
        struct xfs_inode        *src = XFS_I(inode_in);
        struct inode            *inode_out = file_inode(file_out);
        struct xfs_inode        *dest = XFS_I(inode_out);
        bool                    same_inode = (inode_in == inode_out);
-       u64                     blkmask = i_blocksize(inode_in) - 1;
        ssize_t                 ret;
 
        /* Lock both files against IO */
@@ -1323,29 +1328,11 @@ xfs_reflink_remap_prep(
        if (IS_DAX(inode_in) || IS_DAX(inode_out))
                goto out_unlock;
 
-       ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
-                       len, is_dedupe);
-       if (ret <= 0)
+       ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
+                       len, remap_flags);
+       if (ret < 0 || *len == 0)
                goto out_unlock;
 
-       /*
-        * If the dedupe data matches, chop off the partial EOF block
-        * from the source file so we don't try to dedupe the partial
-        * EOF block.
-        */
-       if (is_dedupe) {
-               *len &= ~blkmask;
-       } else if (*len & blkmask) {
-               /*
-                * The user is attempting to share a partial EOF block,
-                * if it's inside the destination EOF then reject it.
-                */
-               if (pos_out + *len < i_size_read(inode_out)) {
-                       ret = -EINVAL;
-                       goto out_unlock;
-               }
-       }
-
        /* Attach dquots to dest inode before changing block map */
        ret = xfs_qm_dqattach(dest);
        if (ret)
@@ -1365,31 +1352,9 @@ xfs_reflink_remap_prep(
                goto out_unlock;
 
        /* Zap any page cache for the destination file's range. */
-       truncate_inode_pages_range(&inode_out->i_data, pos_out,
-                                  PAGE_ALIGN(pos_out + *len) - 1);
-
-       /* If we're altering the file contents... */
-       if (!is_dedupe) {
-               /*
-                * ...update the timestamps (which will grab the ilock again
-                * from xfs_fs_dirty_inode, so we have to call it before we
-                * take the ilock).
-                */
-               if (!(file_out->f_mode & FMODE_NOCMTIME)) {
-                       ret = file_update_time(file_out);
-                       if (ret)
-                               goto out_unlock;
-               }
-
-               /*
-                * ...clear the security bits if the process is not being run
-                * by root.  This keeps people from modifying setuid and setgid
-                * binaries.
-                */
-               ret = file_remove_privs(file_out);
-               if (ret)
-                       goto out_unlock;
-       }
+       truncate_inode_pages_range(&inode_out->i_data,
+                       round_down(pos_out, PAGE_SIZE),
+                       round_up(pos_out + *len, PAGE_SIZE) - 1);
 
        return 1;
 out_unlock:
@@ -1397,72 +1362,6 @@ out_unlock:
        return ret;
 }
 
-/*
- * Link a range of blocks from one file to another.
- */
-int
-xfs_reflink_remap_range(
-       struct file             *file_in,
-       loff_t                  pos_in,
-       struct file             *file_out,
-       loff_t                  pos_out,
-       u64                     len,
-       bool                    is_dedupe)
-{
-       struct inode            *inode_in = file_inode(file_in);
-       struct xfs_inode        *src = XFS_I(inode_in);
-       struct inode            *inode_out = file_inode(file_out);
-       struct xfs_inode        *dest = XFS_I(inode_out);
-       struct xfs_mount        *mp = src->i_mount;
-       xfs_fileoff_t           sfsbno, dfsbno;
-       xfs_filblks_t           fsblen;
-       xfs_extlen_t            cowextsize;
-       ssize_t                 ret;
-
-       if (!xfs_sb_version_hasreflink(&mp->m_sb))
-               return -EOPNOTSUPP;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       /* Prepare and then clone file data. */
-       ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
-                       &len, is_dedupe);
-       if (ret <= 0)
-               return ret;
-
-       trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
-
-       dfsbno = XFS_B_TO_FSBT(mp, pos_out);
-       sfsbno = XFS_B_TO_FSBT(mp, pos_in);
-       fsblen = XFS_B_TO_FSB(mp, len);
-       ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
-                       pos_out + len);
-       if (ret)
-               goto out_unlock;
-
-       /*
-        * Carry the cowextsize hint from src to dest if we're sharing the
-        * entire source file to the entire destination file, the source file
-        * has a cowextsize hint, and the destination file does not.
-        */
-       cowextsize = 0;
-       if (pos_in == 0 && len == i_size_read(inode_in) &&
-           (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
-           pos_out == 0 && len >= i_size_read(inode_out) &&
-           !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
-               cowextsize = src->i_d.di_cowextsize;
-
-       ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
-                       is_dedupe);
-
-out_unlock:
-       xfs_reflink_remap_unlock(file_in, file_out);
-       if (ret)
-               trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
-       return ret;
-}
-
 /*
  * The user wants to preemptively CoW all shared blocks in this file,
  * which enables us to turn off the reflink flag.  Iterate all
index 7f47202b5639142054420b2fb2384e9c44cfbb0d..6d73daef1f132398d0b2ee02ed319c067bd02b6f 100644 (file)
@@ -27,13 +27,24 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
                xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
-extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
-               struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
+extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
+               struct file *file_out, loff_t pos_out, loff_t len,
+               unsigned int remap_flags);
 extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
                struct xfs_inode *ip, bool *has_shared);
 extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
                struct xfs_trans **tpp);
 extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
                xfs_off_t len);
+extern int xfs_reflink_remap_prep(struct file *file_in, loff_t pos_in,
+               struct file *file_out, loff_t pos_out, loff_t *len,
+               unsigned int remap_flags);
+extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in,
+               struct xfs_inode *dest, loff_t pos_out, loff_t remap_len,
+               loff_t *remapped);
+extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen,
+               xfs_extlen_t cowextsize, unsigned int remap_flags);
+extern void xfs_reflink_remap_unlock(struct file *file_in,
+               struct file *file_out);
 
 #endif /* __XFS_REFLINK_H */
index 89f3b03b14451af9f4a9707172a97143f2ae9b5e..e3667c9a33a5deea5ef1f849b97cb0c3fe83a404 100644 (file)
@@ -3,7 +3,7 @@
 #define _4LEVEL_FIXUP_H
 
 #define __ARCH_HAS_4LEVEL_HACK
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 #define PUD_SHIFT                      PGDIR_SHIFT
 #define PUD_SIZE                       PGDIR_SIZE
index 9c2e0708eb82f4aeb8c009f51f78fc43ae511036..73474bb52344d982abaee00ffcbad322308e06f4 100644 (file)
@@ -3,7 +3,7 @@
 #define _5LEVEL_FIXUP_H
 
 #define __ARCH_HAS_5LEVEL_HACK
-#define __PAGETABLE_P4D_FOLDED
+#define __PAGETABLE_P4D_FOLDED 1
 
 #define P4D_SHIFT                      PGDIR_SHIFT
 #define P4D_SIZE                       PGDIR_SIZE
index 0c34215263b8aec624451b3f04575a1ea6328cf7..1d6dd38c0e5ea8a2155c370cf27bb808f252031e 100644 (file)
@@ -5,7 +5,7 @@
 #ifndef __ASSEMBLY__
 #include <asm-generic/5level-fixup.h>
 
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 /*
  * Having the pud type consist of a pgd gets the size right, and allows
index 1a29b2a0282bf20a8541b79096b474d16c5ae50c..04cb913797bc0d534032364c05d53c50d8d7d73f 100644 (file)
@@ -4,7 +4,7 @@
 
 #ifndef __ASSEMBLY__
 
-#define __PAGETABLE_P4D_FOLDED
+#define __PAGETABLE_P4D_FOLDED 1
 
 typedef struct { pgd_t pgd; } p4d_t;
 
index f35f6e8149e47dca34e7cded26574b0786322fc1..b85b8271a73debc1dc58f661ba9073c399fab175 100644 (file)
@@ -8,7 +8,7 @@
 
 struct mm_struct;
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 /*
  * Having the pmd type consist of a pud gets the size right, and allows
index e950b9c50f34f218284ff0785366c9ef07a6bdf9..9bef475db6fefe1e3b79c04cff754efd1b383de0 100644 (file)
@@ -9,7 +9,7 @@
 #else
 #include <asm-generic/pgtable-nop4d.h>
 
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 /*
  * Having the pud type consist of a p4d gets the size right, and allows
index 5657a20e0c599449d9851e08f672b2c2cb7f6d64..359fb935ded6ab0cc418659f6f119d26886db129 100644 (file)
@@ -1127,4 +1127,20 @@ static inline bool arch_has_pfn_modify_check(void)
 #endif
 #endif
 
+/*
+ * On some architectures it depends on the mm if the p4d/pud or pmd
+ * layer of the page table hierarchy is folded or not.
+ */
+#ifndef mm_p4d_folded
+#define mm_p4d_folded(mm)      __is_defined(__PAGETABLE_P4D_FOLDED)
+#endif
+
+#ifndef mm_pud_folded
+#define mm_pud_folded(mm)      __is_defined(__PAGETABLE_PUD_FOLDED)
+#endif
+
+#ifndef mm_pmd_folded
+#define mm_pmd_folded(mm)      __is_defined(__PAGETABLE_PMD_FOLDED)
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/include/crypto/asym_tpm_subtype.h b/include/crypto/asym_tpm_subtype.h
new file mode 100644 (file)
index 0000000..48198c3
--- /dev/null
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _LINUX_ASYM_TPM_SUBTYPE_H
+#define _LINUX_ASYM_TPM_SUBTYPE_H
+
+#include <linux/keyctl.h>
+
+struct tpm_key {
+       void *blob;
+       u32 blob_len;
+       uint16_t key_len; /* Size in bits of the key */
+       const void *pub_key; /* pointer inside blob to the public key bytes */
+       uint16_t pub_key_len; /* length of the public key */
+};
+
+struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len);
+
+extern struct asymmetric_key_subtype asym_tpm_subtype;
+
+#endif /* _LINUX_ASYM_TPM_SUBTYPE_H */
index e0b681a717bac93ce3323b1a353fceba8310f811..be626eac911338cc12c5783de8d7181d8cb47949 100644 (file)
@@ -14,6 +14,8 @@
 #ifndef _LINUX_PUBLIC_KEY_H
 #define _LINUX_PUBLIC_KEY_H
 
+#include <linux/keyctl.h>
+
 /*
  * Cryptographic data for the public-key subtype of the asymmetric key type.
  *
@@ -23,6 +25,7 @@
 struct public_key {
        void *key;
        u32 keylen;
+       bool key_is_private;
        const char *id_type;
        const char *pkey_algo;
 };
@@ -40,6 +43,7 @@ struct public_key_signature {
        u8 digest_size;         /* Number of bytes in digest */
        const char *pkey_algo;
        const char *hash_algo;
+       const char *encoding;
 };
 
 extern void public_key_signature_free(struct public_key_signature *sig);
@@ -65,8 +69,14 @@ extern int restrict_link_by_key_or_keyring_chain(struct key *trust_keyring,
                                                 const union key_payload *payload,
                                                 struct key *trusted);
 
-extern int verify_signature(const struct key *key,
-                           const struct public_key_signature *sig);
+extern int query_asymmetric_key(const struct kernel_pkey_params *,
+                               struct kernel_pkey_query *);
+
+extern int encrypt_blob(struct kernel_pkey_params *, const void *, void *);
+extern int decrypt_blob(struct kernel_pkey_params *, const void *, void *);
+extern int create_signature(struct kernel_pkey_params *, const void *, void *);
+extern int verify_signature(const struct key *,
+                           const struct public_key_signature *);
 
 int public_key_verify_signature(const struct public_key *pkey,
                                const struct public_key_signature *sig);
index 91a877fa00cb59161125a711515931e49b040e53..9ccad6b062f2bb62c54434288e7fc9a25b194b53 100644 (file)
@@ -82,6 +82,53 @@ enum drm_connector_status {
        connector_status_unknown = 3,
 };
 
+/**
+ * enum drm_connector_registration_status - userspace registration status for
+ * a &drm_connector
+ *
+ * This enum is used to track the status of initializing a connector and
+ * registering it with userspace, so that DRM can prevent bogus modesets on
+ * connectors that no longer exist.
+ */
+enum drm_connector_registration_state {
+       /**
+        * @DRM_CONNECTOR_INITIALIZING: The connector has just been created,
+        * but has yet to be exposed to userspace. There should be no
+        * additional restrictions to how the state of this connector may be
+        * modified.
+        */
+       DRM_CONNECTOR_INITIALIZING = 0,
+
+       /**
+        * @DRM_CONNECTOR_REGISTERED: The connector has been fully initialized
+        * and registered with sysfs, as such it has been exposed to
+        * userspace. There should be no additional restrictions to how the
+        * state of this connector may be modified.
+        */
+       DRM_CONNECTOR_REGISTERED = 1,
+
+       /**
+        * @DRM_CONNECTOR_UNREGISTERED: The connector has either been exposed
+        * to userspace and has since been unregistered and removed from
+        * userspace, or the connector was unregistered before it had a chance
+        * to be exposed to userspace (e.g. still in the
+        * @DRM_CONNECTOR_INITIALIZING state). When a connector is
+        * unregistered, there are additional restrictions to how its state
+        * may be modified:
+        *
+        * - An unregistered connector may only have its DPMS changed from
+        *   On->Off. Once DPMS is changed to Off, it may not be switched back
+        *   to On.
+        * - Modesets are not allowed on unregistered connectors, unless they
+        *   would result in disabling its assigned CRTCs. This means
+        *   disabling a CRTC on an unregistered connector is OK, but enabling
+        *   one is not.
+        * - Removing a CRTC from an unregistered connector is OK, but new
+        *   CRTCs may never be assigned to an unregistered connector.
+        */
+       DRM_CONNECTOR_UNREGISTERED = 2,
+};
+
 enum subpixel_order {
        SubPixelUnknown = 0,
        SubPixelHorizontalRGB,
@@ -853,10 +900,12 @@ struct drm_connector {
        bool ycbcr_420_allowed;
 
        /**
-        * @registered: Is this connector exposed (registered) with userspace?
+        * @registration_state: Is this connector initializing, exposed
+        * (registered) with userspace, or unregistered?
+        *
         * Protected by @mutex.
         */
-       bool registered;
+       enum drm_connector_registration_state registration_state;
 
        /**
         * @modes:
@@ -1166,6 +1215,24 @@ static inline void drm_connector_unreference(struct drm_connector *connector)
        drm_connector_put(connector);
 }
 
+/**
+ * drm_connector_is_unregistered - has the connector been unregistered from
+ * userspace?
+ * @connector: DRM connector
+ *
+ * Checks whether or not @connector has been unregistered from userspace.
+ *
+ * Returns:
+ * True if the connector was unregistered, false if the connector is
+ * registered or has not yet been registered with userspace.
+ */
+static inline bool
+drm_connector_is_unregistered(struct drm_connector *connector)
+{
+       return READ_ONCE(connector->registration_state) ==
+               DRM_CONNECTOR_UNREGISTERED;
+}
+
 const char *drm_get_connector_status_name(enum drm_connector_status status);
 const char *drm_get_subpixel_order_name(enum subpixel_order order);
 const char *drm_get_dpms_name(int val);
index e0a9c236887281d793acff2ea457d2290c56cb4e..9ce2f0fae57e39da7dd7471248402a8b1f2a2a35 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/seq_file.h>
 #include <keys/asymmetric-type.h>
 
+struct kernel_pkey_query;
+struct kernel_pkey_params;
 struct public_key_signature;
 
 /*
@@ -34,6 +36,13 @@ struct asymmetric_key_subtype {
        /* Destroy a key of this subtype */
        void (*destroy)(void *payload_crypto, void *payload_auth);
 
+       int (*query)(const struct kernel_pkey_params *params,
+                    struct kernel_pkey_query *info);
+
+       /* Encrypt/decrypt/sign data */
+       int (*eds_op)(struct kernel_pkey_params *params,
+                     const void *in, void *out);
+
        /* Verify the signature on a key of this subtype (optional) */
        int (*verify_signature)(const struct key *key,
                                const struct public_key_signature *sig);
diff --git a/include/keys/trusted.h b/include/keys/trusted.h
new file mode 100644 (file)
index 0000000..adbcb68
--- /dev/null
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRUSTED_KEY_H
+#define __TRUSTED_KEY_H
+
+/* implementation specific TPM constants */
+#define MAX_BUF_SIZE                   1024
+#define TPM_GETRANDOM_SIZE             14
+#define TPM_OSAP_SIZE                  36
+#define TPM_OIAP_SIZE                  10
+#define TPM_SEAL_SIZE                  87
+#define TPM_UNSEAL_SIZE                        104
+#define TPM_SIZE_OFFSET                        2
+#define TPM_RETURN_OFFSET              6
+#define TPM_DATA_OFFSET                        10
+
+#define LOAD32(buffer, offset) (ntohl(*(uint32_t *)&buffer[offset]))
+#define LOAD32N(buffer, offset)        (*(uint32_t *)&buffer[offset])
+#define LOAD16(buffer, offset) (ntohs(*(uint16_t *)&buffer[offset]))
+
+struct tpm_buf {
+       int len;
+       unsigned char data[MAX_BUF_SIZE];
+};
+
+#define INIT_BUF(tb) (tb->len = 0)
+
+struct osapsess {
+       uint32_t handle;
+       unsigned char secret[SHA1_DIGEST_SIZE];
+       unsigned char enonce[TPM_NONCE_SIZE];
+};
+
+/* discrete values, but have to store in uint16_t for TPM use */
+enum {
+       SEAL_keytype = 1,
+       SRK_keytype = 4
+};
+
+int TSS_authhmac(unsigned char *digest, const unsigned char *key,
+                       unsigned int keylen, unsigned char *h1,
+                       unsigned char *h2, unsigned char h3, ...);
+int TSS_checkhmac1(unsigned char *buffer,
+                         const uint32_t command,
+                         const unsigned char *ononce,
+                         const unsigned char *key,
+                         unsigned int keylen, ...);
+
+int trusted_tpm_send(unsigned char *cmd, size_t buflen);
+int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce);
+
+#define TPM_DEBUG 0
+
+#if TPM_DEBUG
+static inline void dump_options(struct trusted_key_options *o)
+{
+       pr_info("trusted_key: sealing key type %d\n", o->keytype);
+       pr_info("trusted_key: sealing key handle %0X\n", o->keyhandle);
+       pr_info("trusted_key: pcrlock %d\n", o->pcrlock);
+       pr_info("trusted_key: pcrinfo %d\n", o->pcrinfo_len);
+       print_hex_dump(KERN_INFO, "pcrinfo ", DUMP_PREFIX_NONE,
+                      16, 1, o->pcrinfo, o->pcrinfo_len, 0);
+}
+
+static inline void dump_payload(struct trusted_key_payload *p)
+{
+       pr_info("trusted_key: key_len %d\n", p->key_len);
+       print_hex_dump(KERN_INFO, "key ", DUMP_PREFIX_NONE,
+                      16, 1, p->key, p->key_len, 0);
+       pr_info("trusted_key: bloblen %d\n", p->blob_len);
+       print_hex_dump(KERN_INFO, "blob ", DUMP_PREFIX_NONE,
+                      16, 1, p->blob, p->blob_len, 0);
+       pr_info("trusted_key: migratable %d\n", p->migratable);
+}
+
+static inline void dump_sess(struct osapsess *s)
+{
+       print_hex_dump(KERN_INFO, "trusted-key: handle ", DUMP_PREFIX_NONE,
+                      16, 1, &s->handle, 4, 0);
+       pr_info("trusted-key: secret:\n");
+       print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE,
+                      16, 1, &s->secret, SHA1_DIGEST_SIZE, 0);
+       pr_info("trusted-key: enonce:\n");
+       print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE,
+                      16, 1, &s->enonce, SHA1_DIGEST_SIZE, 0);
+}
+
+static inline void dump_tpm_buf(unsigned char *buf)
+{
+       int len;
+
+       pr_info("\ntrusted-key: tpm buffer\n");
+       len = LOAD32(buf, TPM_SIZE_OFFSET);
+       print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, buf, len, 0);
+}
+#else
+static inline void dump_options(struct trusted_key_options *o)
+{
+}
+
+static inline void dump_payload(struct trusted_key_payload *p)
+{
+}
+
+static inline void dump_sess(struct osapsess *s)
+{
+}
+
+static inline void dump_tpm_buf(unsigned char *buf)
+{
+}
+#endif
+
+static inline void store8(struct tpm_buf *buf, const unsigned char value)
+{
+       buf->data[buf->len++] = value;
+}
+
+static inline void store16(struct tpm_buf *buf, const uint16_t value)
+{
+       *(uint16_t *) & buf->data[buf->len] = htons(value);
+       buf->len += sizeof value;
+}
+
+static inline void store32(struct tpm_buf *buf, const uint32_t value)
+{
+       *(uint32_t *) & buf->data[buf->len] = htonl(value);
+       buf->len += sizeof value;
+}
+
+static inline void storebytes(struct tpm_buf *buf, const unsigned char *in,
+                             const int len)
+{
+       memcpy(buf->data + buf->len, in, len);
+       buf->len += len;
+}
+#endif
index 2a629acb4c3f467221c8db81ded322780f31b99a..2d29f55923e3ad2e79c70cd0832a8c8523f96ef8 100644 (file)
@@ -7,7 +7,12 @@
 #ifndef _LINUX_ADXL_H
 #define _LINUX_ADXL_H
 
+#ifdef CONFIG_ACPI_ADXL
 const char * const *adxl_get_component_names(void);
 int adxl_decode(u64 addr, u64 component_values[]);
+#else
+static inline const char * const *adxl_get_component_names(void)  { return NULL; }
+static inline int adxl_decode(u64 addr, u64 component_values[])   { return  -EOPNOTSUPP; }
+#endif
 
 #endif /* _LINUX_ADXL_H */
index 2c9756bd9c4cdc4b07ac9e8f6158480b0e58d88f..b2488055fd1d18a5e2986fe9fb27ec4b4bf572df 100644 (file)
 /* Error Codes */
 enum virtchnl_status_code {
        VIRTCHNL_STATUS_SUCCESS                         = 0,
-       VIRTCHNL_ERR_PARAM                              = -5,
+       VIRTCHNL_STATUS_ERR_PARAM                       = -5,
+       VIRTCHNL_STATUS_ERR_NO_MEMORY                   = -18,
        VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH             = -38,
        VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR             = -39,
        VIRTCHNL_STATUS_ERR_INVALID_VF_ID               = -40,
-       VIRTCHNL_STATUS_NOT_SUPPORTED                   = -64,
+       VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR           = -53,
+       VIRTCHNL_STATUS_ERR_NOT_SUPPORTED               = -64,
 };
 
+/* Backward compatibility */
+#define VIRTCHNL_ERR_PARAM VIRTCHNL_STATUS_ERR_PARAM
+#define VIRTCHNL_STATUS_NOT_SUPPORTED VIRTCHNL_STATUS_ERR_NOT_SUPPORTED
+
 #define VIRTCHNL_LINK_SPEED_100MB_SHIFT                0x1
 #define VIRTCHNL_LINK_SPEED_1000MB_SHIFT       0x2
 #define VIRTCHNL_LINK_SPEED_10GB_SHIFT         0x3
@@ -831,7 +837,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
        case VIRTCHNL_OP_EVENT:
        case VIRTCHNL_OP_UNKNOWN:
        default:
-               return VIRTCHNL_ERR_PARAM;
+               return VIRTCHNL_STATUS_ERR_PARAM;
        }
        /* few more checks */
        if (err_msg_format || valid_len != msglen)
index b47c7f716731fc5ebcdf28f03db3b5eadada59e0..056fb627edb3e82779d0d4f92b27be43a423ce15 100644 (file)
@@ -503,31 +503,23 @@ do {                                              \
        disk_devt((bio)->bi_disk)
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page);
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
 #else
-static inline int bio_associate_blkg_from_page(struct bio *bio,
-                                              struct page *page) { return 0; }
+static inline int bio_associate_blkcg_from_page(struct bio *bio,
+                                               struct page *page) {  return 0; }
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
-int bio_associate_blkg_from_css(struct bio *bio,
-                               struct cgroup_subsys_state *css);
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
-void bio_clone_blkg_association(struct bio *dst, struct bio *src);
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
 #else  /* CONFIG_BLK_CGROUP */
-static inline int bio_associate_blkg_from_css(struct bio *bio,
-                                             struct cgroup_subsys_state *css)
-{ return 0; }
-static inline int bio_associate_create_blkg(struct request_queue *q,
-                                           struct bio *bio) { return 0; }
-static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
-{ return 0; }
+static inline int bio_associate_blkcg(struct bio *bio,
+                       struct cgroup_subsys_state *blkcg_css) { return 0; }
 static inline void bio_disassociate_task(struct bio *bio) { }
-static inline void bio_clone_blkg_association(struct bio *dst,
-                                             struct bio *src) { }
+static inline void bio_clone_blkcg_association(struct bio *dst,
+                       struct bio *src) { }
 #endif /* CONFIG_BLK_CGROUP */
 
 #ifdef CONFIG_HIGHMEM
index 1e76ceebeb5dc58c7f98e9f1d18d65fbe62477ef..6d766a19f2bbb2b62facc79ff3871aa81be68534 100644 (file)
@@ -126,7 +126,7 @@ struct blkcg_gq {
        struct request_list             rl;
 
        /* reference count */
-       struct percpu_ref               refcnt;
+       atomic_t                        refcnt;
 
        /* is this blkg online? protected by both blkcg and q locks */
        bool                            online;
@@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
 
 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
                                      struct request_queue *q, bool update_hint);
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-                                     struct request_queue *q);
 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
                                    struct request_queue *q);
 int blkcg_init_queue(struct request_queue *q);
@@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
                   char *input, struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
-       struct cgroup_subsys_state *css;
-
-       css = kthread_blkcg();
-       if (css)
-               return css;
-       return task_css(current, io_cgrp_id);
-}
 
 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 {
        return css ? container_of(css, struct blkcg, css) : NULL;
 }
 
-/**
- * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
- *
- * DO NOT USE.
- * There is a flaw using this version of the function.  In particular, this was
- * used in a broken paradigm where association was called on the given css.  It
- * is possible though that the returned css from task_css() is in the process
- * of dying due to migration of the current task.  So it is improper to assume
- * *_get() is going to succeed.  Both BFQ and CFQ rely on this logic and will
- * take additional work to handle more gracefully.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
-       if (bio && bio->bi_blkg)
-               return bio->bi_blkg->blkcg;
-       return css_to_blkcg(blkcg_css());
-}
-
-/**
- * bio_blkcg - grab the blkcg associated with a bio
- * @bio: target bio
- *
- * This returns the blkcg associated with a bio, NULL if not associated.
- * Callers are expected to either handle NULL or know association has been
- * done prior to calling this.
- */
 static inline struct blkcg *bio_blkcg(struct bio *bio)
 {
-       if (bio && bio->bi_blkg)
-               return bio->bi_blkg->blkcg;
-       return NULL;
+       struct cgroup_subsys_state *css;
+
+       if (bio && bio->bi_css)
+               return css_to_blkcg(bio->bi_css);
+       css = kthread_blkcg();
+       if (css)
+               return css_to_blkcg(css);
+       return css_to_blkcg(task_css(current, io_cgrp_id));
 }
 
 static inline bool blk_cgroup_congested(void)
@@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
  */
 static inline void blkg_get(struct blkcg_gq *blkg)
 {
-       percpu_ref_get(&blkg->refcnt);
+       WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+       atomic_inc(&blkg->refcnt);
 }
 
 /**
- * blkg_tryget - try and get a blkg reference
+ * blkg_try_get - try and get a blkg reference
  * @blkg: blkg to get
  *
  * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
  * of freeing this blkg, so we can only use it if the refcnt is not zero.
  */
-static inline bool blkg_tryget(struct blkcg_gq *blkg)
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
 {
-       return percpu_ref_tryget(&blkg->refcnt);
+       if (atomic_inc_not_zero(&blkg->refcnt))
+               return blkg;
+       return NULL;
 }
 
-/**
- * blkg_tryget_closest - try and get a blkg ref on the closet blkg
- * @blkg: blkg to get
- *
- * This walks up the blkg tree to find the closest non-dying blkg and returns
- * the blkg that it did association with as it may not be the passed in blkg.
- */
-static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
-{
-       while (!percpu_ref_tryget(&blkg->refcnt))
-               blkg = blkg->parent;
 
-       return blkg;
-}
+void __blkg_release_rcu(struct rcu_head *rcu);
 
 /**
  * blkg_put - put a blkg reference
@@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
  */
 static inline void blkg_put(struct blkcg_gq *blkg)
 {
-       percpu_ref_put(&blkg->refcnt);
+       WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+       if (atomic_dec_and_test(&blkg->refcnt))
+               call_rcu(&blkg->rcu_head, __blkg_release_rcu);
 }
 
 /**
@@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 
        rcu_read_lock();
 
-       if (bio && bio->bi_blkg) {
-               blkcg = bio->bi_blkg->blkcg;
-               if (blkcg == &blkcg_root)
-                       goto rl_use_root;
-
-               blkg_get(bio->bi_blkg);
-               rcu_read_unlock();
-               return &bio->bi_blkg->rl;
-       }
+       blkcg = bio_blkcg(bio);
 
-       blkcg = css_to_blkcg(blkcg_css());
+       /* bypass blkg lookup and use @q->root_rl directly for root */
        if (blkcg == &blkcg_root)
-               goto rl_use_root;
+               goto root_rl;
 
+       /*
+        * Try to use blkg->rl.  blkg lookup may fail under memory pressure
+        * or if either the blkcg or queue is going away.  Fall back to
+        * root_rl in such cases.
+        */
        blkg = blkg_lookup(blkcg, q);
        if (unlikely(!blkg))
-               blkg = __blkg_lookup_create(blkcg, q);
-
-       if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
-               goto rl_use_root;
+               goto root_rl;
 
+       blkg_get(blkg);
        rcu_read_unlock();
        return &blkg->rl;
-
-       /*
-        * Each blkg has its own request_list, however, the root blkcg
-        * uses the request_queue's root_rl.  This is to avoid most
-        * overhead for the root blkcg.
-        */
-rl_use_root:
+root_rl:
        rcu_read_unlock();
        return &q->root_rl;
 }
@@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
                                  struct bio *bio) { return false; }
 #endif
 
-
-static inline void blkcg_bio_issue_init(struct bio *bio)
-{
-       bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-}
-
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
                                         struct bio *bio)
 {
+       struct blkcg *blkcg;
        struct blkcg_gq *blkg;
        bool throtl = false;
 
        rcu_read_lock();
+       blkcg = bio_blkcg(bio);
+
+       /* associate blkcg if bio hasn't attached one */
+       bio_associate_blkcg(bio, &blkcg->css);
 
-       bio_associate_create_blkg(q, bio);
-       blkg = bio->bi_blkg;
+       blkg = blkg_lookup(blkcg, q);
+       if (unlikely(!blkg)) {
+               spin_lock_irq(q->queue_lock);
+               blkg = blkg_lookup_create(blkcg, q);
+               if (IS_ERR(blkg))
+                       blkg = NULL;
+               spin_unlock_irq(q->queue_lock);
+       }
 
        throtl = blk_throtl_bio(q, blkg, bio);
 
        if (!throtl) {
+               blkg = blkg ?: q->root_blkg;
                /*
                 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
                 * is a split bio and we would have already accounted for the
@@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
                blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
        }
 
-       blkcg_bio_issue_init(bio);
-
        rcu_read_unlock();
        return !throtl;
 }
@@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 static inline void blkcg_deactivate_policy(struct request_queue *q,
                                           const struct blkcg_policy *pol) { }
 
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { }
 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
 
-static inline void blkcg_bio_issue_init(struct bio *bio) { }
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
                                         struct bio *bio) { return true; }
 
index 093a818c5b684754dbe8877721389947d74674ac..1dcf652ba0aa3e989a7b93e78e4303aa920a1dab 100644 (file)
@@ -178,6 +178,7 @@ struct bio {
         * release.  Read comment on top of bio_associate_current().
         */
        struct io_context       *bi_ioc;
+       struct cgroup_subsys_state *bi_css;
        struct blkcg_gq         *bi_blkg;
        struct bio_issue        bi_issue;
 #endif
index 9e8056ec20faab8c3ffd4e99843a166ace95a0a1..d93e89761a8b429c2b5568688b7bf388e5b977d7 100644 (file)
@@ -51,6 +51,9 @@ struct bpf_reg_state {
                 *   PTR_TO_MAP_VALUE_OR_NULL
                 */
                struct bpf_map *map_ptr;
+
+               /* Max size from any of the above. */
+               unsigned long raw;
        };
        /* Fixed part of pointer offset, pointer types only */
        s32 off;
index 6b92b3395fa9954ec140df3ed5b2b6d7b4f527f1..65a38c4a02a18d59ff5837447264f7f00998e4fb 100644 (file)
@@ -213,12 +213,6 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
         CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING | \
         CEPH_FEATURE_CEPHX_V2)
 
-#define CEPH_FEATURES_REQUIRED_DEFAULT   \
-       (CEPH_FEATURE_NOSRCADDR |        \
-        CEPH_FEATURE_SUBSCRIBE2 |       \
-        CEPH_FEATURE_RECONNECT_SEQ |    \
-        CEPH_FEATURE_PGID64 |           \
-        CEPH_FEATURE_PGPOOL3 |          \
-        CEPH_FEATURE_OSDENC)
+#define CEPH_FEATURES_REQUIRED_DEFAULT 0
 
 #endif
index 9968332cceed0e64e5fc9bdb814507b0bf67451b..9d12757a65b01846486341c5d31d398ee51d89b4 100644 (file)
@@ -93,8 +93,6 @@ extern struct css_set init_css_set;
 
 bool css_has_online_children(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
-                                        struct cgroup_subsys *ss);
 struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
                                             struct cgroup_subsys *ss);
 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
index 06e77473f17593dc1fac4ad01af652776735b6ef..88720b443cd646f588fa183d60a0f5c549fc030e 100644 (file)
@@ -1032,9 +1032,9 @@ int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
 #else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
-#ifndef in_compat_syscall
+/* Ensure no one redefines in_compat_syscall() under !CONFIG_COMPAT */
+#define in_compat_syscall in_compat_syscall
 static inline bool in_compat_syscall(void) { return false; }
-#endif
 
 #endif /* CONFIG_COMPAT */
 
index b1ce500fe8b3df06a8fca84d6f022b02813000f8..3e7dafb3ea8099285d4185df113f2c89a0426e06 100644 (file)
@@ -21,8 +21,6 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
-#define __no_sanitize_address __attribute__((no_sanitize("address")))
-
 /*
  * Not all versions of clang implement the the type-generic versions
  * of the builtin overflow checkers. Fortunately, clang implements
@@ -41,6 +39,3 @@
  * compilers, like ICC.
  */
 #define barrier() __asm__ __volatile__("" : : : "memory")
-#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
-#define __assume_aligned(a, ...)       \
-       __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
index 90ddfefb6c2b12886acba122a6d2e79e99951f82..2010493e1040846c999804e2e157233c27ccef60 100644 (file)
  */
 #define uninitialized_var(x) x = x
 
-#ifdef __CHECKER__
-#define __must_be_array(a)     0
-#else
-/* &a[0] degrades to a pointer: a different type from an array */
-#define __must_be_array(a)     BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
-#endif
-
 #ifdef RETPOLINE
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#define __noretpoline __attribute__((__indirect_branch__("keep")))
 #endif
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-#define __optimize(level)      __attribute__((__optimize__(level)))
-
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
-#ifndef __CHECKER__
-#define __compiletime_warning(message) __attribute__((warning(message)))
-#define __compiletime_error(message) __attribute__((error(message)))
+#define __compiletime_warning(message) __attribute__((__warning__(message)))
+#define __compiletime_error(message) __attribute__((__error__(message)))
 
-#ifdef LATENT_ENTROPY_PLUGIN
+#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 #define __latent_entropy __attribute__((latent_entropy))
 #endif
-#endif /* __CHECKER__ */
 
 /*
  * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
  * control elsewhere.
- *
- * Early snapshots of gcc 4.5 don't support this and we can't detect
- * this in the preprocessor, but we can live with this because they're
- * unreleased.  Really, we need to have autoconf for the kernel.
  */
 #define unreachable() \
        do {                                    \
                __builtin_unreachable();        \
        } while (0)
 
-/* Mark a function definition as prohibited from being cloned. */
-#define __noclone      __attribute__((__noclone__, __optimize__("no-tracer")))
-
 #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
 #define __randomize_layout __attribute__((randomize_layout))
 #define __no_randomize_layout __attribute__((no_randomize_layout))
 #define randomized_struct_fields_end   } __randomize_layout;
 #endif
 
-/*
- * When used with Link Time Optimization, gcc can optimize away C functions or
- * variables which are referenced only from assembly code.  __visible tells the
- * optimizer that something else uses this function or variable, thus preventing
- * this.
- */
-#define __visible      __attribute__((externally_visible))
-
-/* gcc version specific checks */
-
-#if GCC_VERSION >= 40900 && !defined(__CHECKER__)
-/*
- * __assume_aligned(n, k): Tell the optimizer that the returned
- * pointer can be assumed to be k modulo n. The second argument is
- * optional (default 0), so we use a variadic macro to make the
- * shorthand.
- *
- * Beware: Do not apply this to functions which may return
- * ERR_PTRs. Also, it is probably unwise to apply it to functions
- * returning extra information in the low bits (but in that case the
- * compiler should see some alignment anyway, when the return value is
- * massaged by 'flags = ptr & 3; ptr &= ~3;').
- */
-#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
-#endif
-
 /*
  * GCC 'asm goto' miscompiles certain code sequences:
  *
 #define KASAN_ABI_VERSION 3
 #endif
 
-#if GCC_VERSION >= 40902
-/*
- * Tell the compiler that address safety instrumentation (KASAN)
- * should not be applied to that function.
- * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
- */
-#define __no_sanitize_address __attribute__((no_sanitize_address))
-#ifdef CONFIG_KASAN
-#define __no_sanitize_address_or_inline                                        \
-       __no_sanitize_address __maybe_unused notrace
-#else
-#define __no_sanitize_address_or_inline inline
-#endif
-#endif
-
 #if GCC_VERSION >= 50100
-/*
- * Mark structures as requiring designated initializers.
- * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html
- */
-#define __designated_init __attribute__((designated_init))
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
 
-#if !defined(__noclone)
-#define __noclone      /* not needed */
-#endif
-
-#if !defined(__no_sanitize_address)
-#define __no_sanitize_address
-#define __no_sanitize_address_or_inline inline
-#endif
-
 /*
  * Turn individual warnings and errors on and off locally, depending
  * on version.
index 4c7f9befa9f6c66694c8146aafe4139614733d18..517bd14e122248f029d27e6dd192b492fe3be93b 100644 (file)
  */
 #define OPTIMIZER_HIDE_VAR(var) barrier()
 
-/* Intel ECC compiler doesn't support __builtin_types_compatible_p() */
-#define __must_be_array(a) 0
-
 #endif
 
 /* icc has this, but it's called _bswap16 */
 #define __HAVE_BUILTIN_BSWAP16__
 #define __builtin_bswap16 _bswap16
-
-/* The following are for compatibility with GCC, from compiler-gcc.h,
- * and may be redefined here because they should not be shared with other
- * compilers, like clang.
- */
-#define __visible      __attribute__((externally_visible))
index 4170fcee5adb30780dae6c06a5b34bcd57855d02..06396c1cf127f75bb357326883f1dcb69161ccf1 100644 (file)
@@ -23,8 +23,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 #define __branch_check__(x, expect, is_constant) ({                    \
                        long ______r;                                   \
                        static struct ftrace_likely_data                \
-                               __attribute__((__aligned__(4)))         \
-                               __attribute__((section("_ftrace_annotated_branch"))) \
+                               __aligned(4)                            \
+                               __section("_ftrace_annotated_branch")   \
                                ______f = {                             \
                                .data.func = __func__,                  \
                                .data.file = __FILE__,                  \
@@ -59,8 +59,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
        ({                                                              \
                int ______r;                                            \
                static struct ftrace_branch_data                        \
-                       __attribute__((__aligned__(4)))                 \
-                       __attribute__((section("_ftrace_branch")))      \
+                       __aligned(4)                                    \
+                       __section("_ftrace_branch")                     \
                        ______f = {                                     \
                                .func = __func__,                       \
                                .file = __FILE__,                       \
@@ -115,7 +115,10 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 # define ASM_UNREACHABLE
 #endif
 #ifndef unreachable
-# define unreachable() do { annotate_reachable(); do { } while (1); } while (0)
+# define unreachable() do {            \
+       annotate_unreachable();         \
+       __builtin_unreachable();        \
+} while (0)
 #endif
 
 /*
@@ -137,7 +140,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
        extern typeof(sym) sym;                                 \
        static const unsigned long __kentry_##sym               \
        __used                                                  \
-       __attribute__((section("___kentry" "+" #sym ), used))   \
+       __section("___kentry" "+" #sym )                        \
        = (unsigned long)&sym;
 #endif
 
@@ -186,7 +189,7 @@ void __read_once_size(const volatile void *p, void *res, int size)
  *     https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
  * '__maybe_unused' allows us to avoid defined-but-not-used warnings.
  */
-# define __no_kasan_or_inline __no_sanitize_address __maybe_unused
+# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
 #else
 # define __no_kasan_or_inline __always_inline
 #endif
@@ -278,7 +281,7 @@ unsigned long read_word_at_a_time(const void *addr)
  * visible to the compiler.
  */
 #define __ADDRESSABLE(sym) \
-       static void * __attribute__((section(".discard.addressable"), used)) \
+       static void * __section(".discard.addressable") __used \
                __PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
 
 /**
@@ -331,10 +334,6 @@ static inline void *offset_to_ptr(const int *off)
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
-#ifndef __optimize
-# define __optimize(level)
-#endif
-
 /* Compile time object size, -1 for unknown */
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
@@ -376,4 +375,7 @@ static inline void *offset_to_ptr(const int *off)
        compiletime_assert(__native_word(t),                            \
                "Need native word sized stores/loads for atomicity.")
 
+/* &a[0] degrades to a pointer: a different type from an array */
+#define __must_be_array(a)     BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
new file mode 100644 (file)
index 0000000..f8c400b
--- /dev/null
@@ -0,0 +1,262 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_COMPILER_ATTRIBUTES_H
+#define __LINUX_COMPILER_ATTRIBUTES_H
+
+/*
+ * The attributes in this file are unconditionally defined and they directly
+ * map to compiler attribute(s), unless one of the compilers does not support
+ * the attribute. In that case, __has_attribute is used to check for support
+ * and the reason is stated in its comment ("Optional: ...").
+ *
+ * Any other "attributes" (i.e. those that depend on a configuration option,
+ * on a compiler, on an architecture, on plugins, on other attributes...)
+ * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h).
+ * The intention is to keep this file as simple as possible, as well as
+ * compiler- and version-agnostic (e.g. avoiding GCC_VERSION checks).
+ *
+ * This file is meant to be sorted (by actual attribute name,
+ * not by #define identifier). Use the __attribute__((__name__)) syntax
+ * (i.e. with underscores) to avoid future collisions with other macros.
+ * Provide links to the documentation of each supported compiler, if it exists.
+ */
+
+/*
+ * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
+ * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute
+ * by hand.
+ *
+ * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__
+ * depending on the compiler used to build it; however, these attributes have
+ * no semantic effects for sparse, so it does not matter. Also note that,
+ * in order to avoid sparse's warnings, even the unsupported ones must be
+ * defined to 0.
+ */
+#ifndef __has_attribute
+# define __has_attribute(x) __GCC4_has_attribute_##x
+# define __GCC4_has_attribute___assume_aligned__      (__GNUC_MINOR__ >= 9)
+# define __GCC4_has_attribute___designated_init__     0
+# define __GCC4_has_attribute___externally_visible__  1
+# define __GCC4_has_attribute___noclone__             1
+# define __GCC4_has_attribute___optimize__            1
+# define __GCC4_has_attribute___nonstring__           0
+# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute
+ */
+#define __alias(symbol)                 __attribute__((__alias__(#symbol)))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-aligned-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-aligned-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-aligned-variable-attribute
+ */
+#define __aligned(x)                    __attribute__((__aligned__(x)))
+#define __aligned_largest               __attribute__((__aligned__))
+
+/*
+ * Note: users of __always_inline currently do not write "inline" themselves,
+ * which seems to be required by gcc to apply the attribute according
+ * to its docs (and also "warning: always_inline function might not be
+ * inlinable [-Wattributes]" is emitted).
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-always_005finline-function-attribute
+ * clang: mentioned
+ */
+#define __always_inline                 inline __attribute__((__always_inline__))
+
+/*
+ * The second argument is optional (default 0), so we use a variadic macro
+ * to make the shorthand.
+ *
+ * Beware: Do not apply this to functions which may return
+ * ERR_PTRs. Also, it is probably unwise to apply it to functions
+ * returning extra information in the low bits (but in that case the
+ * compiler should see some alignment anyway, when the return value is
+ * massaged by 'flags = ptr & 3; ptr &= ~3;').
+ *
+ * Optional: only supported since gcc >= 4.9
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned
+ */
+#if __has_attribute(__assume_aligned__)
+# define __assume_aligned(a, ...)       __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
+#else
+# define __assume_aligned(a, ...)
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
+ */
+#define __cold                          __attribute__((__cold__))
+
+/*
+ * Note the long name.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-const-function-attribute
+ */
+#define __attribute_const__             __attribute__((__const__))
+
+/*
+ * Don't. Just don't. See commit 771c035372a0 ("deprecate the '__deprecated'
+ * attribute warnings entirely and for good") for more information.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-deprecated-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-deprecated-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-deprecated-variable-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Enumerator-Attributes.html#index-deprecated-enumerator-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#deprecated
+ */
+#define __deprecated
+
+/*
+ * Optional: only supported since gcc >= 5.1
+ * Optional: not supported by clang
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute
+ */
+#if __has_attribute(__designated_init__)
+# define __designated_init              __attribute__((__designated_init__))
+#else
+# define __designated_init
+#endif
+
+/*
+ * Optional: not supported by clang
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute
+ */
+#if __has_attribute(__externally_visible__)
+# define __visible                      __attribute__((__externally_visible__))
+#else
+# define __visible
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-format-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#format
+ */
+#define __printf(a, b)                  __attribute__((__format__(printf, a, b)))
+#define __scanf(a, b)                   __attribute__((__format__(scanf, a, b)))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-gnu_005finline-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#gnu-inline
+ */
+#define __gnu_inline                    __attribute__((__gnu_inline__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute
+ */
+#define __malloc                        __attribute__((__malloc__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-mode-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-mode-variable-attribute
+ */
+#define __mode(x)                       __attribute__((__mode__(x)))
+
+/*
+ * Optional: not supported by clang
+ * Note: icc does not recognize gcc's no-tracer
+ *
+ *  gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noclone-function-attribute
+ *  gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-optimize-function-attribute
+ */
+#if __has_attribute(__noclone__)
+# if __has_attribute(__optimize__)
+#  define __noclone                     __attribute__((__noclone__, __optimize__("no-tracer")))
+# else
+#  define __noclone                     __attribute__((__noclone__))
+# endif
+#else
+# define __noclone
+#endif
+
+/*
+ * Note the missing underscores.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute
+ * clang: mentioned
+ */
+#define   noinline                      __attribute__((__noinline__))
+
+/*
+ * Optional: only supported since gcc >= 8
+ * Optional: not supported by clang
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute
+ */
+#if __has_attribute(__nonstring__)
+# define __nonstring                    __attribute__((__nonstring__))
+#else
+# define __nonstring
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noreturn-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#noreturn
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#id1
+ */
+#define __noreturn                      __attribute__((__noreturn__))
+
+/*
+ * Optional: only supported since gcc >= 4.8
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-no_005fsanitize_005faddress-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#no-sanitize-address-no-address-safety-analysis
+ */
+#if __has_attribute(__no_sanitize_address__)
+# define __no_sanitize_address          __attribute__((__no_sanitize_address__))
+#else
+# define __no_sanitize_address
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-packed-type-attribute
+ * clang: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-packed-variable-attribute
+ */
+#define __packed                        __attribute__((__packed__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-pure-function-attribute
+ */
+#define __pure                          __attribute__((__pure__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-section-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-section-variable-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate
+ */
+#define __section(S)                    __attribute__((__section__(#S)))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-unused-label-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#maybe-unused-unused
+ */
+#define __always_unused                 __attribute__((__unused__))
+#define __maybe_unused                  __attribute__((__unused__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-used-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-used-variable-attribute
+ */
+#define __used                          __attribute__((__used__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
+ */
+#define __weak                          __attribute__((__weak__))
+
+#endif /* __LINUX_COMPILER_ATTRIBUTES_H */
index 97cfe29b3f0adf6da5537d68bc7d85ad9769dcd5..4a3f9c09c92d04583f9a0d4fa335a1fb9ce44ab8 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_COMPILER_TYPES_H
 #define __LINUX_COMPILER_TYPES_H
 
@@ -54,6 +55,9 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 
 #ifdef __KERNEL__
 
+/* Attributes */
+#include <linux/compiler_attributes.h>
+
 /* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>
@@ -78,12 +82,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 #include <asm/compiler.h>
 #endif
 
-/*
- * Generic compiler-independent macros required for kernel
- * build go below this comment. Actual compiler/compiler version
- * specific implementations come from the above header files
- */
-
 struct ftrace_branch_data {
        const char *func;
        const char *file;
@@ -106,10 +104,6 @@ struct ftrace_likely_data {
        unsigned long                   constant;
 };
 
-/* Don't. Just don't. */
-#define __deprecated
-#define __deprecated_for_modules
-
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
@@ -119,10 +113,6 @@ struct ftrace_likely_data {
  * compilers. We don't consider that to be an error, so set them to nothing.
  * For example, some of them are for compiler specific plugins.
  */
-#ifndef __designated_init
-# define __designated_init
-#endif
-
 #ifndef __latent_entropy
 # define __latent_entropy
 #endif
@@ -140,15 +130,8 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
-#ifndef __visible
-#define __visible
-#endif
-
-/*
- * Assume alignment of return value.
- */
-#ifndef __assume_aligned
-#define __assume_aligned(a, ...)
+#ifndef asm_volatile_goto
+#define asm_volatile_goto(x...) asm goto(x)
 #endif
 
 /* Are two types/vars the same type (ignoring qualifiers)? */
@@ -159,14 +142,6 @@ struct ftrace_likely_data {
        (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
         sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
 
-#ifndef __attribute_const__
-#define __attribute_const__    __attribute__((__const__))
-#endif
-
-#ifndef __noclone
-#define __noclone
-#endif
-
 /* Helpers for emitting diagnostics in pragmas. */
 #ifndef __diag
 #define __diag(string)
@@ -186,43 +161,16 @@ struct ftrace_likely_data {
 #define __diag_error(compiler, version, option, comment) \
        __diag_ ## compiler(version, error, option)
 
-/*
- * From the GCC manual:
- *
- * Many functions have no effects except the return value and their
- * return value depends only on the parameters and/or global
- * variables.  Such a function can be subject to common subexpression
- * elimination and loop optimization just as an arithmetic operator
- * would be.
- * [...]
- */
-#define __pure                 __attribute__((pure))
-#define __aligned(x)           __attribute__((aligned(x)))
-#define __printf(a, b)         __attribute__((format(printf, a, b)))
-#define __scanf(a, b)          __attribute__((format(scanf, a, b)))
-#define __maybe_unused         __attribute__((unused))
-#define __always_unused                __attribute__((unused))
-#define __mode(x)              __attribute__((mode(x)))
-#define __malloc               __attribute__((__malloc__))
-#define __used                 __attribute__((__used__))
-#define __noreturn             __attribute__((noreturn))
-#define __packed               __attribute__((packed))
-#define __weak                 __attribute__((weak))
-#define __alias(symbol)                __attribute__((alias(#symbol)))
-#define __cold                 __attribute__((cold))
-#define __section(S)           __attribute__((__section__(#S)))
-
-
 #ifdef CONFIG_ENABLE_MUST_CHECK
-#define __must_check           __attribute__((warn_unused_result))
+#define __must_check           __attribute__((__warn_unused_result__))
 #else
 #define __must_check
 #endif
 
-#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
+#if defined(CC_USING_HOTPATCH)
 #define notrace                        __attribute__((hotpatch(0, 0)))
 #else
-#define notrace                        __attribute__((no_instrument_function))
+#define notrace                        __attribute__((__no_instrument_function__))
 #endif
 
 /*
@@ -231,22 +179,10 @@ struct ftrace_likely_data {
  * stack and frame pointer being set up and there is no chance to
  * restore the lr register to the value before mcount was called.
  */
-#define __naked                        __attribute__((naked)) notrace
+#define __naked                        __attribute__((__naked__)) notrace
 
 #define __compiler_offsetof(a, b)      __builtin_offsetof(a, b)
 
-/*
- * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
- * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
- * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
- * defined so the gnu89 semantics are the default.
- */
-#ifdef __GNUC_STDC_INLINE__
-# define __gnu_inline  __attribute__((gnu_inline))
-#else
-# define __gnu_inline
-#endif
-
 /*
  * Force always-inline if the user requests it so via the .config.
  * GCC does not warn about unused static inline functions for
@@ -258,22 +194,20 @@ struct ftrace_likely_data {
  * semantics rather than c99. This prevents multiple symbol definition errors
  * of extern inline functions at link time.
  * A lot of inline functions can cause havoc with function tracing.
+ * Do not use __always_inline here, since currently it expands to inline again
+ * (which would break users of __always_inline).
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
        !defined(CONFIG_OPTIMIZE_INLINING)
-#define inline \
-       inline __attribute__((always_inline, unused)) notrace __gnu_inline
+#define inline inline __attribute__((__always_inline__)) __gnu_inline \
+       __maybe_unused notrace
 #else
-#define inline inline  __attribute__((unused)) notrace __gnu_inline
+#define inline inline                                    __gnu_inline \
+       __maybe_unused notrace
 #endif
 
 #define __inline__ inline
-#define __inline inline
-#define noinline       __attribute__((noinline))
-
-#ifndef __always_inline
-#define __always_inline inline __attribute__((always_inline))
-#endif
+#define __inline   inline
 
 /*
  * Rather then using noinline to prevent stack consumption, use
index caf40ad0bbc6e0f42027aa1c68f3fa5d07477c9a..e0cd2baa83809a8cff419d0a65562c3a80870232 100644 (file)
@@ -126,6 +126,7 @@ enum cpuhp_state {
        CPUHP_AP_MIPS_GIC_TIMER_STARTING,
        CPUHP_AP_ARC_TIMER_STARTING,
        CPUHP_AP_RISCV_TIMER_STARTING,
+       CPUHP_AP_CSKY_TIMER_STARTING,
        CPUHP_AP_KVM_STARTING,
        CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
        CPUHP_AP_KVM_ARM_VGIC_STARTING,
index 8252df30b9a16afe53c269b912b47e17edac7b41..c95c0807471fd178c5d943c315fda2aab0f993b8 100644 (file)
@@ -1752,6 +1752,25 @@ struct block_device_operations;
 #define NOMMU_VMFLAGS \
        (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
 
+/*
+ * These flags control the behavior of the remap_file_range function pointer.
+ * If it is called with len == 0 that means "remap to end of source file".
+ * See Documentation/filesystems/vfs.txt for more details about this call.
+ *
+ * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
+ */
+#define REMAP_FILE_DEDUP               (1 << 0)
+#define REMAP_FILE_CAN_SHORTEN         (1 << 1)
+
+/*
+ * These flags signal that the caller is ok with altering various aspects of
+ * the behavior of the remap operation.  The changes must be made by the
+ * implementation; the vfs remap helper functions can take advantage of them.
+ * Flags in this category exist to preserve the quirky behavior of the hoisted
+ * btrfs clone/dedupe ioctls.
+ */
+#define REMAP_FILE_ADVISORY            (REMAP_FILE_CAN_SHORTEN)
 
 struct iov_iter;
 
@@ -1790,10 +1809,9 @@ struct file_operations {
 #endif
        ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
                        loff_t, size_t, unsigned int);
-       int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
-                       u64);
-       int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
-                       u64);
+       loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+                                  struct file *file_out, loff_t pos_out,
+                                  loff_t len, unsigned int remap_flags);
        int (*fadvise)(struct file *, loff_t, loff_t, int);
 } __randomize_layout;
 
@@ -1856,21 +1874,21 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
                unsigned long, loff_t *, rwf_t);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
                                   loff_t, size_t, unsigned int);
-extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-                                     struct inode *inode_out, loff_t pos_out,
-                                     u64 *len, bool is_dedupe);
-extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
-                              struct file *file_out, loff_t pos_out, u64 len);
-extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-                               struct file *file_out, loff_t pos_out, u64 len);
-extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
-                                        struct inode *dest, loff_t destoff,
-                                        loff_t len, bool *is_same);
+extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+                                        struct file *file_out, loff_t pos_out,
+                                        loff_t *count,
+                                        unsigned int remap_flags);
+extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+                                 struct file *file_out, loff_t pos_out,
+                                 loff_t len, unsigned int remap_flags);
+extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+                                  struct file *file_out, loff_t pos_out,
+                                  loff_t len, unsigned int remap_flags);
 extern int vfs_dedupe_file_range(struct file *file,
                                 struct file_dedupe_range *same);
-extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-                                    struct file *dst_file, loff_t dst_pos,
-                                    u64 len);
+extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+                                       struct file *dst_file, loff_t dst_pos,
+                                       loff_t len, unsigned int remap_flags);
 
 
 struct super_operations {
@@ -2998,6 +3016,9 @@ extern int sb_min_blocksize(struct super_block *, int);
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
+extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
+                               struct file *file_out, loff_t pos_out,
+                               loff_t *count, unsigned int remap_flags);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
index 24bcc5eec6b409ec379602156a94d74373ec2633..76f8db0b0e715c016cc00cb95aa9a269f12c075d 100644 (file)
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
                        struct vm_area_struct *vma, unsigned long addr,
-                       int node, bool hugepage);
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
-       alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
+                       int node);
 #else
 #define alloc_pages(gfp_mask, order) \
                alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
-       alloc_pages(gfp_mask, order)
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
        alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)                    \
-       alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
+       alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)         \
-       alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
+       alloc_pages_vma(gfp_mask, 0, vma, addr, node)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
index 2827b87590d8d55893c87a1593be582994f4a7e5..387c70df6f29cc215f4678d6d19292e1f0b7ff20 100644 (file)
@@ -722,8 +722,8 @@ struct hid_usage_id {
  * input will not be passed to raw_event unless hid_device_io_start is
  * called.
  *
- * raw_event and event should return 0 on no action performed, 1 when no
- * further processing should be done and negative on error
+ * raw_event and event should return negative on error, any other value will
+ * pass the event on to .event() typically return 0 for success.
  *
  * input_mapping shall return a negative value to completely ignore this usage
  * (e.g. doubled or invalid usage), zero to continue with parsing of this
index c759d1cbcedd8d7f19f835457641dee9761a0fa0..a64f21a97369a5a7f8eb35c5a5040d359ffd6681 100644 (file)
@@ -37,7 +37,9 @@ struct in_device {
        unsigned long           mr_v1_seen;
        unsigned long           mr_v2_seen;
        unsigned long           mr_maxdelay;
-       unsigned char           mr_qrv;
+       unsigned long           mr_qi;          /* Query Interval */
+       unsigned long           mr_qri;         /* Query Response Interval */
+       unsigned char           mr_qrv;         /* Query Robustness Variable */
        unsigned char           mr_gq_running;
        unsigned char           mr_ifc_count;
        struct timer_list       mr_gq_timer;    /* general query timer */
index 05d8fb5a06c491076889f57872a1388690e40438..bc9af551fc83821e5bec98e5cbc582b2fe0be07a 100644 (file)
@@ -17,6 +17,9 @@
 
 #ifdef CONFIG_KEYS
 
+struct kernel_pkey_query;
+struct kernel_pkey_params;
+
 /*
  * key under-construction record
  * - passed to the request_key actor if supplied
@@ -155,6 +158,14 @@ struct key_type {
         */
        struct key_restriction *(*lookup_restriction)(const char *params);
 
+       /* Asymmetric key accessor functions. */
+       int (*asym_query)(const struct kernel_pkey_params *params,
+                         struct kernel_pkey_query *info);
+       int (*asym_eds_op)(struct kernel_pkey_params *params,
+                          const void *in, void *out);
+       int (*asym_verify_signature)(struct kernel_pkey_params *params,
+                                    const void *in, const void *in2);
+
        /* internal fields */
        struct list_head        link;           /* link in types list */
        struct lock_class_key   lock_class;     /* key->sem lock class */
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
new file mode 100644 (file)
index 0000000..c7c48c7
--- /dev/null
@@ -0,0 +1,46 @@
+/* keyctl kernel bits
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef __LINUX_KEYCTL_H
+#define __LINUX_KEYCTL_H
+
+#include <uapi/linux/keyctl.h>
+
+struct kernel_pkey_query {
+       __u32           supported_ops;  /* Which ops are supported */
+       __u32           key_size;       /* Size of the key in bits */
+       __u16           max_data_size;  /* Maximum size of raw data to sign in bytes */
+       __u16           max_sig_size;   /* Maximum size of signature in bytes */
+       __u16           max_enc_size;   /* Maximum size of encrypted blob in bytes */
+       __u16           max_dec_size;   /* Maximum size of decrypted blob in bytes */
+};
+
+enum kernel_pkey_operation {
+       kernel_pkey_encrypt,
+       kernel_pkey_decrypt,
+       kernel_pkey_sign,
+       kernel_pkey_verify,
+};
+
+struct kernel_pkey_params {
+       struct key      *key;
+       const char      *encoding;      /* Encoding (eg. "oaep" or "raw" for none) */
+       const char      *hash_algo;     /* Digest algorithm used (eg. "sha1") or NULL if N/A */
+       char            *info;          /* Modified info string to be released later */
+       __u32           in_len;         /* Input data size */
+       union {
+               __u32   out_len;        /* Output buffer size (enc/dec/sign) */
+               __u32   in2_len;        /* 2nd input data size (verify) */
+       };
+       enum kernel_pkey_operation op : 8;
+};
+
+#endif /* __LINUX_KEYCTL_H */
index 5228c62af41659bb7d5ae0e7db00969b9f16ef73..bac395f1d00a0f9691b12ec6841f2401a10ca4fc 100644 (file)
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
                unsigned long addr);
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+                                               unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
index fcf9cc9d535faf54c6b0fa463b6cf09643d0e5c4..5411de93a363e8a14bb980a30c8e5af67f25907e 100644 (file)
@@ -1744,11 +1744,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
 
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
+       if (mm_pud_folded(mm))
+               return;
        atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
+       if (mm_pud_folded(mm))
+               return;
        atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 #endif
@@ -1768,11 +1772,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
+       if (mm_pmd_folded(mm))
+               return;
        atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
+       if (mm_pmd_folded(mm))
+               return;
        atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 #endif
index abe975c87b9003a7301f0e879f6bdad733e6c583..7f53ece2c039aeb849ca929b2b13cb29bd172292 100644 (file)
@@ -324,9 +324,8 @@ static inline unsigned int nanddev_ntargets(const struct nand_device *nand)
  */
 static inline unsigned int nanddev_neraseblocks(const struct nand_device *nand)
 {
-       return (u64)nand->memorg.luns_per_target *
-              nand->memorg.eraseblocks_per_lun *
-              nand->memorg.pages_per_eraseblock;
+       return nand->memorg.ntargets * nand->memorg.luns_per_target *
+              nand->memorg.eraseblocks_per_lun;
 }
 
 /**
@@ -569,7 +568,7 @@ static inline void nanddev_pos_next_eraseblock(struct nand_device *nand,
 }
 
 /**
- * nanddev_pos_next_eraseblock() - Move a position to the next page
+ * nanddev_pos_next_page() - Move a position to the next page
  * @nand: NAND device
  * @pos: the position to update
  *
index dc1d9ed33b3192e9406b17c3107b3235b28ff1b9..857f8abf7b91bc79731873fc8f68e31f6bff4d03 100644 (file)
@@ -3190,6 +3190,26 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 #endif
 }
 
+/* Variant of netdev_tx_sent_queue() for drivers that are aware
+ * that they should not test BQL status themselves.
+ * We do want to change __QUEUE_STATE_STACK_XOFF only for the last
+ * skb of a batch.
+ * Returns true if the doorbell must be used to kick the NIC.
+ */
+static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue,
+                                         unsigned int bytes,
+                                         bool xmit_more)
+{
+       if (xmit_more) {
+#ifdef CONFIG_BQL
+               dql_queued(&dev_queue->dql, bytes);
+#endif
+               return netif_tx_queue_stopped(dev_queue);
+       }
+       netdev_tx_sent_queue(dev_queue, bytes);
+       return true;
+}
+
 /**
  *     netdev_sent_queue - report the number of bytes queued to hardware
  *     @dev: network device
index 34fc80f3eb900deb8e4c21b10edf8909c469e7b4..1d100efe74ec76861084a4272327b662cf1de478 100644 (file)
@@ -314,7 +314,7 @@ enum {
 extern ip_set_id_t ip_set_get_byname(struct net *net,
                                     const char *name, struct ip_set **set);
 extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
-extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
+extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
 extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
 extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
 
index 8e2bab1e8e90930f954ec7dc3a1b7a8179eecd13..70877f8de7e919d30716f0483610dfb12eeec433 100644 (file)
@@ -43,11 +43,11 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
        rcu_assign_pointer(comment->c, c);
 }
 
-/* Used only when dumping a set, protected by rcu_read_lock_bh() */
+/* Used only when dumping a set, protected by rcu_read_lock() */
 static inline int
 ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
 {
-       struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
+       struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
 
        if (!c)
                return 0;
index 08f9247e9827e0056eb4d82c5c83a73a19cebd11..9003e29cde4615eb9a9e7785c675d36e1f24b8df 100644 (file)
@@ -119,6 +119,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; }
 void watchdog_nmi_stop(void);
 void watchdog_nmi_start(void);
 int watchdog_nmi_probe(void);
+int watchdog_nmi_enable(unsigned int cpu);
+void watchdog_nmi_disable(unsigned int cpu);
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
index f35c7bf7614302ee51f0896258a3dfc08d5224f8..0096a05395e380a35fe25a6329e05953eb66ab66 100644 (file)
@@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 
 #ifdef CONFIG_TREE_SRCU
 #define _SRCU_NOTIFIER_HEAD(name, mod)                         \
-       static DEFINE_PER_CPU(struct srcu_data,                 \
-                       name##_head_srcu_data);                 \
+       static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \
        mod struct srcu_notifier_head name =                    \
                        SRCU_NOTIFIER_INIT(name, name##_head_srcu_data)
 
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
new file mode 100644 (file)
index 0000000..53dfc25
--- /dev/null
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PLATFORM_DATA_X86_ASUS_WMI_H
+#define __PLATFORM_DATA_X86_ASUS_WMI_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+/* WMI Methods */
+#define ASUS_WMI_METHODID_SPEC         0x43455053 /* BIOS SPECification */
+#define ASUS_WMI_METHODID_SFBD         0x44424653 /* Set First Boot Device */
+#define ASUS_WMI_METHODID_GLCD         0x44434C47 /* Get LCD status */
+#define ASUS_WMI_METHODID_GPID         0x44495047 /* Get Panel ID?? (Resol) */
+#define ASUS_WMI_METHODID_QMOD         0x444F4D51 /* Quiet MODe */
+#define ASUS_WMI_METHODID_SPLV         0x4C425053 /* Set Panel Light Value */
+#define ASUS_WMI_METHODID_AGFN         0x4E464741 /* FaN? */
+#define ASUS_WMI_METHODID_SFUN         0x4E554653 /* FUNCtionalities */
+#define ASUS_WMI_METHODID_SDSP         0x50534453 /* Set DiSPlay output */
+#define ASUS_WMI_METHODID_GDSP         0x50534447 /* Get DiSPlay output */
+#define ASUS_WMI_METHODID_DEVP         0x50564544 /* DEVice Policy */
+#define ASUS_WMI_METHODID_OSVR         0x5256534F /* OS VeRsion */
+#define ASUS_WMI_METHODID_DSTS         0x53544344 /* Device STatuS */
+#define ASUS_WMI_METHODID_DSTS2                0x53545344 /* Device STatuS #2*/
+#define ASUS_WMI_METHODID_BSTS         0x53545342 /* Bios STatuS ? */
+#define ASUS_WMI_METHODID_DEVS         0x53564544 /* DEVice Set */
+#define ASUS_WMI_METHODID_CFVS         0x53564643 /* CPU Frequency Volt Set */
+#define ASUS_WMI_METHODID_KBFT         0x5446424B /* KeyBoard FilTer */
+#define ASUS_WMI_METHODID_INIT         0x54494E49 /* INITialize */
+#define ASUS_WMI_METHODID_HKEY         0x59454B48 /* Hot KEY ?? */
+
+#define ASUS_WMI_UNSUPPORTED_METHOD    0xFFFFFFFE
+
+/* Wireless */
+#define ASUS_WMI_DEVID_HW_SWITCH       0x00010001
+#define ASUS_WMI_DEVID_WIRELESS_LED    0x00010002
+#define ASUS_WMI_DEVID_CWAP            0x00010003
+#define ASUS_WMI_DEVID_WLAN            0x00010011
+#define ASUS_WMI_DEVID_WLAN_LED                0x00010012
+#define ASUS_WMI_DEVID_BLUETOOTH       0x00010013
+#define ASUS_WMI_DEVID_GPS             0x00010015
+#define ASUS_WMI_DEVID_WIMAX           0x00010017
+#define ASUS_WMI_DEVID_WWAN3G          0x00010019
+#define ASUS_WMI_DEVID_UWB             0x00010021
+
+/* Leds */
+/* 0x000200XX and 0x000400XX */
+#define ASUS_WMI_DEVID_LED1            0x00020011
+#define ASUS_WMI_DEVID_LED2            0x00020012
+#define ASUS_WMI_DEVID_LED3            0x00020013
+#define ASUS_WMI_DEVID_LED4            0x00020014
+#define ASUS_WMI_DEVID_LED5            0x00020015
+#define ASUS_WMI_DEVID_LED6            0x00020016
+
+/* Backlight and Brightness */
+#define ASUS_WMI_DEVID_ALS_ENABLE      0x00050001 /* Ambient Light Sensor */
+#define ASUS_WMI_DEVID_BACKLIGHT       0x00050011
+#define ASUS_WMI_DEVID_BRIGHTNESS      0x00050012
+#define ASUS_WMI_DEVID_KBD_BACKLIGHT   0x00050021
+#define ASUS_WMI_DEVID_LIGHT_SENSOR    0x00050022 /* ?? */
+#define ASUS_WMI_DEVID_LIGHTBAR                0x00050025
+
+/* Misc */
+#define ASUS_WMI_DEVID_CAMERA          0x00060013
+
+/* Storage */
+#define ASUS_WMI_DEVID_CARDREADER      0x00080013
+
+/* Input */
+#define ASUS_WMI_DEVID_TOUCHPAD                0x00100011
+#define ASUS_WMI_DEVID_TOUCHPAD_LED    0x00100012
+
+/* Fan, Thermal */
+#define ASUS_WMI_DEVID_THERMAL_CTRL    0x00110011
+#define ASUS_WMI_DEVID_FAN_CTRL                0x00110012
+
+/* Power */
+#define ASUS_WMI_DEVID_PROCESSOR_STATE 0x00120012
+
+/* Deep S3 / Resume on LID open */
+#define ASUS_WMI_DEVID_LID_RESUME      0x00120031
+
+/* DSTS masks */
+#define ASUS_WMI_DSTS_STATUS_BIT       0x00000001
+#define ASUS_WMI_DSTS_UNKNOWN_BIT      0x00000002
+#define ASUS_WMI_DSTS_PRESENCE_BIT     0x00010000
+#define ASUS_WMI_DSTS_USER_BIT         0x00020000
+#define ASUS_WMI_DSTS_BIOS_BIT         0x00040000
+#define ASUS_WMI_DSTS_BRIGHTNESS_MASK  0x000000FF
+#define ASUS_WMI_DSTS_MAX_BRIGTH_MASK  0x0000FF00
+#define ASUS_WMI_DSTS_LIGHTBAR_MASK    0x0000000F
+
+#if IS_REACHABLE(CONFIG_ASUS_WMI)
+int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval);
+#else
+static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
+                                          u32 *retval)
+{
+       return -ENODEV;
+}
+#endif
+
+#endif /* __PLATFORM_DATA_X86_ASUS_WMI_H */
index 8f8a5418b627a2db2377add2da367796e572cd2d..a51c13c2b1a0316b00f928bc9e5afaa2ec949d83 100644 (file)
@@ -1200,6 +1200,11 @@ struct task_struct {
        void                            *security;
 #endif
 
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+       unsigned long                   lowest_stack;
+       unsigned long                   prev_lowest_stack;
+#endif
+
        /*
         * New fields for task_struct should be added above here, so that
         * they are included in the randomized portion of task_struct.
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
new file mode 100644 (file)
index 0000000..3d5c327
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_STACKLEAK_H
+#define _LINUX_STACKLEAK_H
+
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+/*
+ * Check that the poison value points to the unused hole in the
+ * virtual memory map for your platform.
+ */
+#define STACKLEAK_POISON -0xBEEF
+#define STACKLEAK_SEARCH_DEPTH 128
+
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#include <asm/stacktrace.h>
+
+static inline void stackleak_task_init(struct task_struct *t)
+{
+       t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long);
+# ifdef CONFIG_STACKLEAK_METRICS
+       t->prev_lowest_stack = t->lowest_stack;
+# endif
+}
+
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+int stack_erasing_sysctl(struct ctl_table *table, int write,
+                       void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
+#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
+static inline void stackleak_task_init(struct task_struct *t) { }
+#endif
+
+#endif
index 131424cefc6a92381036c099acb3a9833c846506..02c0412e368cc1040212436c424edaa0777f723d 100644 (file)
@@ -107,8 +107,8 @@ struct krb5_ctx {
        u8                      Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
        u8                      cksum[GSS_KRB5_MAX_KEYLEN];
        s32                     endtime;
-       u32                     seq_send;
-       u64                     seq_send64;
+       atomic_t                seq_send;
+       atomic64_t              seq_send64;
        struct xdr_netobj       mech_used;
        u8                      initiator_sign[GSS_KRB5_MAX_KEYLEN];
        u8                      acceptor_sign[GSS_KRB5_MAX_KEYLEN];
@@ -118,9 +118,6 @@ struct krb5_ctx {
        u8                      acceptor_integ[GSS_KRB5_MAX_KEYLEN];
 };
 
-extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx);
-extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx);
-
 /* The length of the Kerberos GSS token header */
 #define GSS_KRB5_TOK_HDR_LEN   (16)
 
index 422b1c01ee0de0d679d7f6cb4276bb7d45e82186..55ce99ddb912f9bd603e5031a328eba96e37fc78 100644 (file)
@@ -21,15 +21,16 @@ struct kvec {
        size_t iov_len;
 };
 
-enum {
+enum iter_type {
        ITER_IOVEC = 0,
        ITER_KVEC = 2,
        ITER_BVEC = 4,
        ITER_PIPE = 8,
+       ITER_DISCARD = 16,
 };
 
 struct iov_iter {
-       int type;
+       unsigned int type;
        size_t iov_offset;
        size_t count;
        union {
@@ -47,6 +48,41 @@ struct iov_iter {
        };
 };
 
+static inline enum iter_type iov_iter_type(const struct iov_iter *i)
+{
+       return i->type & ~(READ | WRITE);
+}
+
+static inline bool iter_is_iovec(const struct iov_iter *i)
+{
+       return iov_iter_type(i) == ITER_IOVEC;
+}
+
+static inline bool iov_iter_is_kvec(const struct iov_iter *i)
+{
+       return iov_iter_type(i) == ITER_KVEC;
+}
+
+static inline bool iov_iter_is_bvec(const struct iov_iter *i)
+{
+       return iov_iter_type(i) == ITER_BVEC;
+}
+
+static inline bool iov_iter_is_pipe(const struct iov_iter *i)
+{
+       return iov_iter_type(i) == ITER_PIPE;
+}
+
+static inline bool iov_iter_is_discard(const struct iov_iter *i)
+{
+       return iov_iter_type(i) == ITER_DISCARD;
+}
+
+static inline unsigned char iov_iter_rw(const struct iov_iter *i)
+{
+       return i->type & (READ | WRITE);
+}
+
 /*
  * Total number of bytes covered by an iovec.
  *
@@ -74,7 +110,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 }
 
 #define iov_for_each(iov, iter, start)                         \
-       if (!((start).type & (ITER_BVEC | ITER_PIPE)))          \
+       if (iov_iter_type(start) == ITER_IOVEC ||               \
+           iov_iter_type(start) == ITER_KVEC)                  \
        for (iter = (start);                                    \
             (iter).count &&                                    \
             ((iov = iov_iter_iovec(&(iter))), 1);              \
@@ -181,14 +218,15 @@ size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
-void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
+void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
                        unsigned long nr_segs, size_t count);
-void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
+void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
                        unsigned long nr_segs, size_t count);
-void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
+void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
                        unsigned long nr_segs, size_t count);
-void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
+void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
                        size_t count);
+void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
                        size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
@@ -202,19 +240,6 @@ static inline size_t iov_iter_count(const struct iov_iter *i)
        return i->count;
 }
 
-static inline bool iter_is_iovec(const struct iov_iter *i)
-{
-       return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
-}
-
-/*
- * Get one of READ or WRITE out of iter->type without any other flags OR'd in
- * with it.
- *
- * The ?: is just for type safety.
- */
-#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE))
-
 /*
  * Cap the iov_iter by given limit; note that the second argument is
  * *not* the new size - it's upper limit for such.  Passing it a value
index 738a0c24874f0bfd308fad36057cae71401f71f0..fdfd04e348f698b3d108228868866072164d31b7 100644 (file)
@@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
  *
  * @bio is a part of the writeback in progress controlled by @wbc.  Perform
  * writeback specific initialization.  This is used to apply the cgroup
- * writeback context.  Must be called after the bio has been associated with
- * a device.
+ * writeback context.
  */
 static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 {
@@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
         * regular writeback instead of writing things out itself.
         */
        if (wbc->wb)
-               bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
+               bio_associate_blkcg(bio, wbc->wb->blkcg_css);
 }
 
 #else  /* CONFIG_CGROUP_WRITEBACK */
index 14b789a123e7d9240cea72fc01bf0d4d7acdba9b..1656c59784987bd486ace6be1f10705fb47ac5c6 100644 (file)
@@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
                         const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
                             const struct in6_addr *addr);
+int ipv6_anycast_init(void);
+void ipv6_anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
index e2695c4bf3580323d9df2edd100388cf7db7e256..ddbba838d048df8f7d2d080442b77fa89130b561 100644 (file)
@@ -13,7 +13,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp);
 void unix_gc(void);
 void wait_for_unix_gc(void);
 struct sock *unix_get_socket(struct file *filp);
-struct sock *unix_peer_get(struct sock *);
+struct sock *unix_peer_get(struct sock *sk);
 
 #define UNIX_HASH_SIZE 256
 #define UNIX_HASH_BITS 8
@@ -40,7 +40,7 @@ struct unix_skb_parms {
        u32                     consumed;
 } __randomize_layout;
 
-#define UNIXCB(skb)    (*(struct unix_skb_parms *)&((skb)->cb))
+#define UNIXCB(skb)    (*(struct unix_skb_parms *)&((skb)->cb))
 
 #define unix_state_lock(s)     spin_lock(&unix_sk(s)->lock)
 #define unix_state_unlock(s)   spin_unlock(&unix_sk(s)->lock)
index d7578cf49c3af85f2cd164a0b242d064b25ed23b..c9c78c15bce04eea71172ecad8693eb363bc2d60 100644 (file)
@@ -146,10 +146,12 @@ struct ifacaddr6 {
        struct in6_addr         aca_addr;
        struct fib6_info        *aca_rt;
        struct ifacaddr6        *aca_next;
+       struct hlist_node       aca_addr_lst;
        int                     aca_users;
        refcount_t              aca_refcnt;
        unsigned long           aca_cstamp;
        unsigned long           aca_tstamp;
+       struct rcu_head         rcu;
 };
 
 #define        IFA_HOST        IPV6_ADDR_LOOPBACK
index eed04af9b75e56b6c33d0887cdefa4c8f827251e..ae7b86f587f2c77c5e2e05972d67b070a39b8711 100644 (file)
@@ -153,4 +153,43 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
                               const char *fmt, ...) { }
 #endif /* CONFIG_SYSCTL */
 
+static inline struct nf_generic_net *nf_generic_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.generic;
+}
+
+static inline struct nf_tcp_net *nf_tcp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.tcp;
+}
+
+static inline struct nf_udp_net *nf_udp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.udp;
+}
+
+static inline struct nf_icmp_net *nf_icmp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmp;
+}
+
+static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmpv6;
+}
+
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.dccp;
+}
+#endif
+
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.sctp;
+}
+#endif
+
 #endif /*_NF_CONNTRACK_PROTOCOL_H*/
index d0a341bc45404b06c08f434d26030dd8be3138db..33d291888ba9c1ceb6edc3cbcd9db4467633939e 100644 (file)
@@ -54,6 +54,35 @@ enum afs_fs_operation {
        afs_FS_StoreData64              = 65538, /* AFS Store file data */
        afs_FS_GiveUpAllCallBacks       = 65539, /* AFS Give up all our callbacks on a server */
        afs_FS_GetCapabilities          = 65540, /* AFS Get FS server capabilities */
+
+       yfs_FS_FetchData                = 130,   /* YFS Fetch file data */
+       yfs_FS_FetchACL                 = 64131, /* YFS Fetch file ACL */
+       yfs_FS_FetchStatus              = 64132, /* YFS Fetch file status */
+       yfs_FS_StoreACL                 = 64134, /* YFS Store file ACL */
+       yfs_FS_StoreStatus              = 64135, /* YFS Store file status */
+       yfs_FS_RemoveFile               = 64136, /* YFS Remove a file */
+       yfs_FS_CreateFile               = 64137, /* YFS Create a file */
+       yfs_FS_Rename                   = 64138, /* YFS Rename or move a file or directory */
+       yfs_FS_Symlink                  = 64139, /* YFS Create a symbolic link */
+       yfs_FS_Link                     = 64140, /* YFS Create a hard link */
+       yfs_FS_MakeDir                  = 64141, /* YFS Create a directory */
+       yfs_FS_RemoveDir                = 64142, /* YFS Remove a directory */
+       yfs_FS_GetVolumeStatus          = 64149, /* YFS Get volume status information */
+       yfs_FS_SetVolumeStatus          = 64150, /* YFS Set volume status information */
+       yfs_FS_SetLock                  = 64156, /* YFS Request a file lock */
+       yfs_FS_ExtendLock               = 64157, /* YFS Extend a file lock */
+       yfs_FS_ReleaseLock              = 64158, /* YFS Release a file lock */
+       yfs_FS_Lookup                   = 64161, /* YFS lookup file in directory */
+       yfs_FS_FlushCPS                 = 64165,
+       yfs_FS_FetchOpaqueACL           = 64168,
+       yfs_FS_WhoAmI                   = 64170,
+       yfs_FS_RemoveACL                = 64171,
+       yfs_FS_RemoveFile2              = 64173,
+       yfs_FS_StoreOpaqueACL2          = 64174,
+       yfs_FS_InlineBulkStatus         = 64536, /* YFS Fetch multiple file statuses with errors */
+       yfs_FS_FetchData64              = 64537, /* YFS Fetch file data */
+       yfs_FS_StoreData64              = 64538, /* YFS Store file data */
+       yfs_FS_UpdateSymlink            = 64540,
 };
 
 enum afs_vl_operation {
@@ -84,6 +113,44 @@ enum afs_edit_dir_reason {
        afs_edit_dir_for_unlink,
 };
 
+enum afs_eproto_cause {
+       afs_eproto_bad_status,
+       afs_eproto_cb_count,
+       afs_eproto_cb_fid_count,
+       afs_eproto_file_type,
+       afs_eproto_ibulkst_cb_count,
+       afs_eproto_ibulkst_count,
+       afs_eproto_motd_len,
+       afs_eproto_offline_msg_len,
+       afs_eproto_volname_len,
+       afs_eproto_yvl_fsendpt4_len,
+       afs_eproto_yvl_fsendpt6_len,
+       afs_eproto_yvl_fsendpt_num,
+       afs_eproto_yvl_fsendpt_type,
+       afs_eproto_yvl_vlendpt4_len,
+       afs_eproto_yvl_vlendpt6_len,
+       afs_eproto_yvl_vlendpt_type,
+};
+
+enum afs_io_error {
+       afs_io_error_cm_reply,
+       afs_io_error_extract,
+       afs_io_error_fs_probe_fail,
+       afs_io_error_vl_lookup_fail,
+       afs_io_error_vl_probe_fail,
+};
+
+enum afs_file_error {
+       afs_file_error_dir_bad_magic,
+       afs_file_error_dir_big,
+       afs_file_error_dir_missing_page,
+       afs_file_error_dir_over_end,
+       afs_file_error_dir_small,
+       afs_file_error_dir_unmarked_ext,
+       afs_file_error_mntpt,
+       afs_file_error_writeback_fail,
+};
+
 #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
 
 /*
@@ -119,7 +186,34 @@ enum afs_edit_dir_reason {
        EM(afs_FS_FetchData64,                  "FS.FetchData64") \
        EM(afs_FS_StoreData64,                  "FS.StoreData64") \
        EM(afs_FS_GiveUpAllCallBacks,           "FS.GiveUpAllCallBacks") \
-       E_(afs_FS_GetCapabilities,              "FS.GetCapabilities")
+       EM(afs_FS_GetCapabilities,              "FS.GetCapabilities") \
+       EM(yfs_FS_FetchACL,                     "YFS.FetchACL") \
+       EM(yfs_FS_FetchStatus,                  "YFS.FetchStatus") \
+       EM(yfs_FS_StoreACL,                     "YFS.StoreACL") \
+       EM(yfs_FS_StoreStatus,                  "YFS.StoreStatus") \
+       EM(yfs_FS_RemoveFile,                   "YFS.RemoveFile") \
+       EM(yfs_FS_CreateFile,                   "YFS.CreateFile") \
+       EM(yfs_FS_Rename,                       "YFS.Rename") \
+       EM(yfs_FS_Symlink,                      "YFS.Symlink") \
+       EM(yfs_FS_Link,                         "YFS.Link") \
+       EM(yfs_FS_MakeDir,                      "YFS.MakeDir") \
+       EM(yfs_FS_RemoveDir,                    "YFS.RemoveDir") \
+       EM(yfs_FS_GetVolumeStatus,              "YFS.GetVolumeStatus") \
+       EM(yfs_FS_SetVolumeStatus,              "YFS.SetVolumeStatus") \
+       EM(yfs_FS_SetLock,                      "YFS.SetLock") \
+       EM(yfs_FS_ExtendLock,                   "YFS.ExtendLock") \
+       EM(yfs_FS_ReleaseLock,                  "YFS.ReleaseLock") \
+       EM(yfs_FS_Lookup,                       "YFS.Lookup") \
+       EM(yfs_FS_FlushCPS,                     "YFS.FlushCPS") \
+       EM(yfs_FS_FetchOpaqueACL,               "YFS.FetchOpaqueACL") \
+       EM(yfs_FS_WhoAmI,                       "YFS.WhoAmI") \
+       EM(yfs_FS_RemoveACL,                    "YFS.RemoveACL") \
+       EM(yfs_FS_RemoveFile2,                  "YFS.RemoveFile2") \
+       EM(yfs_FS_StoreOpaqueACL2,              "YFS.StoreOpaqueACL2") \
+       EM(yfs_FS_InlineBulkStatus,             "YFS.InlineBulkStatus") \
+       EM(yfs_FS_FetchData64,                  "YFS.FetchData64") \
+       EM(yfs_FS_StoreData64,                  "YFS.StoreData64") \
+       E_(yfs_FS_UpdateSymlink,                "YFS.UpdateSymlink")
 
 #define afs_vl_operations \
        EM(afs_VL_GetEntryByNameU,              "VL.GetEntryByNameU") \
@@ -146,6 +240,40 @@ enum afs_edit_dir_reason {
        EM(afs_edit_dir_for_symlink,            "Symlnk") \
        E_(afs_edit_dir_for_unlink,             "Unlink")
 
+#define afs_eproto_causes                      \
+       EM(afs_eproto_bad_status,       "BadStatus") \
+       EM(afs_eproto_cb_count,         "CbCount") \
+       EM(afs_eproto_cb_fid_count,     "CbFidCount") \
+       EM(afs_eproto_file_type,        "FileTYpe") \
+       EM(afs_eproto_ibulkst_cb_count, "IBS.CbCount") \
+       EM(afs_eproto_ibulkst_count,    "IBS.FidCount") \
+       EM(afs_eproto_motd_len,         "MotdLen") \
+       EM(afs_eproto_offline_msg_len,  "OfflineMsgLen") \
+       EM(afs_eproto_volname_len,      "VolNameLen") \
+       EM(afs_eproto_yvl_fsendpt4_len, "YVL.FsEnd4Len") \
+       EM(afs_eproto_yvl_fsendpt6_len, "YVL.FsEnd6Len") \
+       EM(afs_eproto_yvl_fsendpt_num,  "YVL.FsEndCount") \
+       EM(afs_eproto_yvl_fsendpt_type, "YVL.FsEndType") \
+       EM(afs_eproto_yvl_vlendpt4_len, "YVL.VlEnd4Len") \
+       EM(afs_eproto_yvl_vlendpt6_len, "YVL.VlEnd6Len") \
+       E_(afs_eproto_yvl_vlendpt_type, "YVL.VlEndType")
+
+#define afs_io_errors                                                  \
+       EM(afs_io_error_cm_reply,               "CM_REPLY")             \
+       EM(afs_io_error_extract,                "EXTRACT")              \
+       EM(afs_io_error_fs_probe_fail,          "FS_PROBE_FAIL")        \
+       EM(afs_io_error_vl_lookup_fail,         "VL_LOOKUP_FAIL")       \
+       E_(afs_io_error_vl_probe_fail,          "VL_PROBE_FAIL")
+
+#define afs_file_errors                                                        \
+       EM(afs_file_error_dir_bad_magic,        "DIR_BAD_MAGIC")        \
+       EM(afs_file_error_dir_big,              "DIR_BIG")              \
+       EM(afs_file_error_dir_missing_page,     "DIR_MISSING_PAGE")     \
+       EM(afs_file_error_dir_over_end,         "DIR_ENT_OVER_END")     \
+       EM(afs_file_error_dir_small,            "DIR_SMALL")            \
+       EM(afs_file_error_dir_unmarked_ext,     "DIR_UNMARKED_EXT")     \
+       EM(afs_file_error_mntpt,                "MNTPT_READ_FAILED")    \
+       E_(afs_file_error_writeback_fail,       "WRITEBACK_FAILED")
 
 /*
  * Export enum symbols via userspace.
@@ -160,6 +288,9 @@ afs_fs_operations;
 afs_vl_operations;
 afs_edit_dir_ops;
 afs_edit_dir_reasons;
+afs_eproto_causes;
+afs_io_errors;
+afs_file_errors;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -170,17 +301,16 @@ afs_edit_dir_reasons;
 #define EM(a, b)       { a, b },
 #define E_(a, b)       { a, b }
 
-TRACE_EVENT(afs_recv_data,
-           TP_PROTO(struct afs_call *call, unsigned count, unsigned offset,
+TRACE_EVENT(afs_receive_data,
+           TP_PROTO(struct afs_call *call, struct iov_iter *iter,
                     bool want_more, int ret),
 
-           TP_ARGS(call, count, offset, want_more, ret),
+           TP_ARGS(call, iter, want_more, ret),
 
            TP_STRUCT__entry(
+                   __field(loff_t,                     remain          )
                    __field(unsigned int,               call            )
                    __field(enum afs_call_state,        state           )
-                   __field(unsigned int,               count           )
-                   __field(unsigned int,               offset          )
                    __field(unsigned short,             unmarshall      )
                    __field(bool,                       want_more       )
                    __field(int,                        ret             )
@@ -190,17 +320,18 @@ TRACE_EVENT(afs_recv_data,
                    __entry->call       = call->debug_id;
                    __entry->state      = call->state;
                    __entry->unmarshall = call->unmarshall;
-                   __entry->count      = count;
-                   __entry->offset     = offset;
+                   __entry->remain     = iov_iter_count(iter);
                    __entry->want_more  = want_more;
                    __entry->ret        = ret;
                           ),
 
-           TP_printk("c=%08x s=%u u=%u %u/%u wm=%u ret=%d",
+           TP_printk("c=%08x r=%llu u=%u w=%u s=%u ret=%d",
                      __entry->call,
-                     __entry->state, __entry->unmarshall,
-                     __entry->offset, __entry->count,
-                     __entry->want_more, __entry->ret)
+                     __entry->remain,
+                     __entry->unmarshall,
+                     __entry->want_more,
+                     __entry->state,
+                     __entry->ret)
            );
 
 TRACE_EVENT(afs_notify_call,
@@ -301,7 +432,7 @@ TRACE_EVENT(afs_make_fs_call,
                    }
                           ),
 
-           TP_printk("c=%08x %06x:%06x:%06x %s",
+           TP_printk("c=%08x %06llx:%06llx:%06x %s",
                      __entry->call,
                      __entry->fid.vid,
                      __entry->fid.vnode,
@@ -555,24 +686,70 @@ TRACE_EVENT(afs_edit_dir,
            );
 
 TRACE_EVENT(afs_protocol_error,
-           TP_PROTO(struct afs_call *call, int error, const void *where),
+           TP_PROTO(struct afs_call *call, int error, enum afs_eproto_cause cause),
+
+           TP_ARGS(call, error, cause),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int,               call            )
+                   __field(int,                        error           )
+                   __field(enum afs_eproto_cause,      cause           )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call ? call->debug_id : 0;
+                   __entry->error = error;
+                   __entry->cause = cause;
+                          ),
+
+           TP_printk("c=%08x r=%d %s",
+                     __entry->call, __entry->error,
+                     __print_symbolic(__entry->cause, afs_eproto_causes))
+           );
+
+TRACE_EVENT(afs_io_error,
+           TP_PROTO(unsigned int call, int error, enum afs_io_error where),
 
            TP_ARGS(call, error, where),
 
            TP_STRUCT__entry(
                    __field(unsigned int,       call            )
                    __field(int,                error           )
-                   __field(const void *,       where           )
+                   __field(enum afs_io_error,  where           )
                             ),
 
            TP_fast_assign(
-                   __entry->call = call ? call->debug_id : 0;
+                   __entry->call = call;
+                   __entry->error = error;
+                   __entry->where = where;
+                          ),
+
+           TP_printk("c=%08x r=%d %s",
+                     __entry->call, __entry->error,
+                     __print_symbolic(__entry->where, afs_io_errors))
+           );
+
+TRACE_EVENT(afs_file_error,
+           TP_PROTO(struct afs_vnode *vnode, int error, enum afs_file_error where),
+
+           TP_ARGS(vnode, error, where),
+
+           TP_STRUCT__entry(
+                   __field_struct(struct afs_fid,      fid             )
+                   __field(int,                        error           )
+                   __field(enum afs_file_error,        where           )
+                            ),
+
+           TP_fast_assign(
+                   __entry->fid = vnode->fid;
                    __entry->error = error;
                    __entry->where = where;
                           ),
 
-           TP_printk("c=%08x r=%d sp=%pSR",
-                     __entry->call, __entry->error, __entry->where)
+           TP_printk("%llx:%llx:%x r=%d %s",
+                     __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+                     __entry->error,
+                     __print_symbolic(__entry->where, afs_file_errors))
            );
 
 TRACE_EVENT(afs_cm_no_server,
index 0f3cb13db8e93efe9d6319395f21849e896f7a02..f45ee0f69c0c25afaf82c670c82d3fcb26bc8cc2 100644 (file)
 #define KEYCTL_INVALIDATE              21      /* invalidate a key */
 #define KEYCTL_GET_PERSISTENT          22      /* get a user's persistent keyring */
 #define KEYCTL_DH_COMPUTE              23      /* Compute Diffie-Hellman values */
+#define KEYCTL_PKEY_QUERY              24      /* Query public key parameters */
+#define KEYCTL_PKEY_ENCRYPT            25      /* Encrypt a blob using a public key */
+#define KEYCTL_PKEY_DECRYPT            26      /* Decrypt a blob using a public key */
+#define KEYCTL_PKEY_SIGN               27      /* Create a public key signature */
+#define KEYCTL_PKEY_VERIFY             28      /* Verify a public key signature */
 #define KEYCTL_RESTRICT_KEYRING                29      /* Restrict keys allowed to link to a keyring */
 
 /* keyctl structures */
@@ -82,4 +87,29 @@ struct keyctl_kdf_params {
        __u32 __spare[8];
 };
 
+#define KEYCTL_SUPPORTS_ENCRYPT                0x01
+#define KEYCTL_SUPPORTS_DECRYPT                0x02
+#define KEYCTL_SUPPORTS_SIGN           0x04
+#define KEYCTL_SUPPORTS_VERIFY         0x08
+
+struct keyctl_pkey_query {
+       __u32           supported_ops;  /* Which ops are supported */
+       __u32           key_size;       /* Size of the key in bits */
+       __u16           max_data_size;  /* Maximum size of raw data to sign in bytes */
+       __u16           max_sig_size;   /* Maximum size of signature in bytes */
+       __u16           max_enc_size;   /* Maximum size of encrypted blob in bytes */
+       __u16           max_dec_size;   /* Maximum size of decrypted blob in bytes */
+       __u32           __spare[10];
+};
+
+struct keyctl_pkey_params {
+       __s32           key_id;         /* Serial no. of public key to use */
+       __u32           in_len;         /* Input data size */
+       union {
+               __u32           out_len;        /* Output buffer size (encrypt/decrypt/sign) */
+               __u32           in2_len;        /* 2nd input data size (verify) */
+       };
+       __u32           __spare[7];
+};
+
 #endif /*  _LINUX_KEYCTL_H */
index f5ff8a76e208fc45584bb76503860c6fdf6650fd..b01eb502d49c55d04f33cace28a410171239eaf5 100644 (file)
@@ -83,11 +83,11 @@ struct kfd_ioctl_set_cu_mask_args {
 };
 
 struct kfd_ioctl_get_queue_wave_state_args {
-       uint64_t ctl_stack_address;     /* to KFD */
-       uint32_t ctl_stack_used_size;   /* from KFD */
-       uint32_t save_area_used_size;   /* from KFD */
-       uint32_t queue_id;              /* to KFD */
-       uint32_t pad;
+       __u64 ctl_stack_address;        /* to KFD */
+       __u32 ctl_stack_used_size;      /* from KFD */
+       __u32 save_area_used_size;      /* from KFD */
+       __u32 queue_id;                 /* to KFD */
+       __u32 pad;
 };
 
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
@@ -255,10 +255,10 @@ struct kfd_hsa_memory_exception_data {
 
 /* hw exception data */
 struct kfd_hsa_hw_exception_data {
-       uint32_t reset_type;
-       uint32_t reset_cause;
-       uint32_t memory_lost;
-       uint32_t gpu_id;
+       __u32 reset_type;
+       __u32 reset_cause;
+       __u32 memory_lost;
+       __u32 gpu_id;
 };
 
 /* Event data */
index 579974b0bf0d8140882ff2a48f96184f0f829456..7de4f1bdaf06a28a7e64fb9d72fba3d42d0032b8 100644 (file)
@@ -1635,8 +1635,8 @@ enum nft_ng_attributes {
        NFTA_NG_MODULUS,
        NFTA_NG_TYPE,
        NFTA_NG_OFFSET,
-       NFTA_NG_SET_NAME,
-       NFTA_NG_SET_ID,
+       NFTA_NG_SET_NAME,       /* deprecated */
+       NFTA_NG_SET_ID,         /* deprecated */
        __NFTA_NG_MAX
 };
 #define NFTA_NG_MAX    (__NFTA_NG_MAX - 1)
index 156ccd089df184853c180a240bfaaaa27774a4fe..1610fdbab98dfc89212ee653a573da8c39bdefe8 100644 (file)
 #include <linux/if_vlan.h>
 #include <linux/if_pppox.h>
 
+#ifndef __KERNEL__
+#include <limits.h> /* for INT_MIN, INT_MAX */
+#endif
+
 /* Bridge Hooks */
 /* After promisc drops, checksum checks. */
 #define NF_BR_PRE_ROUTING      0
index f35eb72739c09e3ad0bd22e279fa4a33119c15f6..9de8780ac8d97568932d3857de3dc2c8e5de2806 100644 (file)
@@ -646,10 +646,12 @@ struct perf_event_mmap_page {
  *
  *   PERF_RECORD_MISC_MMAP_DATA  - PERF_RECORD_MMAP* events
  *   PERF_RECORD_MISC_COMM_EXEC  - PERF_RECORD_COMM event
+ *   PERF_RECORD_MISC_FORK_EXEC  - PERF_RECORD_FORK event (perf internal)
  *   PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
  */
 #define PERF_RECORD_MISC_MMAP_DATA             (1 << 13)
 #define PERF_RECORD_MISC_COMM_EXEC             (1 << 13)
+#define PERF_RECORD_MISC_FORK_EXEC             (1 << 13)
 #define PERF_RECORD_MISC_SWITCH_OUT            (1 << 13)
 /*
  * These PERF_RECORD_MISC_* flags below are safely reused
index 34dd3d497f2cc52b6742d5bf89fa1e88aa947d57..c81feb373d3ea597a7d2c66ad203ad18ed821189 100644 (file)
@@ -568,6 +568,8 @@ struct sctp_assoc_reset_event {
 
 #define SCTP_ASSOC_CHANGE_DENIED       0x0004
 #define SCTP_ASSOC_CHANGE_FAILED       0x0008
+#define SCTP_STREAM_CHANGE_DENIED      SCTP_ASSOC_CHANGE_DENIED
+#define SCTP_STREAM_CHANGE_FAILED      SCTP_ASSOC_CHANGE_FAILED
 struct sctp_stream_change_event {
        __u16 strchange_type;
        __u16 strchange_flags;
@@ -1151,6 +1153,7 @@ struct sctp_add_streams {
 /* SCTP Stream schedulers */
 enum sctp_sched_type {
        SCTP_SS_FCFS,
+       SCTP_SS_DEFAULT = SCTP_SS_FCFS,
        SCTP_SS_PRIO,
        SCTP_SS_RR,
        SCTP_SS_MAX = SCTP_SS_RR
index 13b8cb563892b7ca66a6268738b452c8428f006c..a1966cd7b6774944a1d818bd823de0253c4a2908 100644 (file)
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST        0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ      1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM        2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_FREE_PAGE_HINT        3 /* VQ to report free pages */
+#define VIRTIO_BALLOON_F_PAGE_POISON   4 /* Guest is using page poisoning */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
 
+#define VIRTIO_BALLOON_CMD_ID_STOP     0
+#define VIRTIO_BALLOON_CMD_ID_DONE     1
 struct virtio_balloon_config {
        /* Number of pages host wants Guest to give up. */
        __u32 num_pages;
        /* Number of pages we've actually got in balloon. */
        __u32 actual;
+       /* Free page report command id, readonly by guest */
+       __u32 free_page_report_cmd_id;
+       /* Stores PAGE_POISON if page poisoning is in use */
+       __u32 poison_val;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
index 18803ff76e27808bc8263284bbd089c32877ba05..4969817124a8d7c6b462aeb18f54105d1d49e2e3 100644 (file)
@@ -42,16 +42,12 @@ int xen_setup_shutdown_event(void);
 
 extern unsigned long *xen_contiguous_bitmap;
 
-#ifdef CONFIG_XEN_PV
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
 int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
                                unsigned int address_bits,
                                dma_addr_t *dma_handle);
 
 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
-
-int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
-                 xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
-                 unsigned int domid, bool no_translate, struct page **pages);
 #else
 static inline int xen_create_contiguous_region(phys_addr_t pstart,
                                               unsigned int order,
@@ -63,7 +59,13 @@ static inline int xen_create_contiguous_region(phys_addr_t pstart,
 
 static inline void xen_destroy_contiguous_region(phys_addr_t pstart,
                                                 unsigned int order) { }
+#endif
 
+#if defined(CONFIG_XEN_PV)
+int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+                 xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
+                 unsigned int domid, bool no_translate, struct page **pages);
+#else
 static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
                                xen_pfn_t *pfn, int nr, int *err_ptr,
                                pgprot_t prot,  unsigned int domid,
index 7a63d567fdb571f357910a83cf3398df65876016..7343b3a9bff07d0155fad5ba137daef6db39135f 100644 (file)
@@ -117,6 +117,10 @@ obj-$(CONFIG_HAS_IOMEM) += iomem.o
 obj-$(CONFIG_ZONE_DEVICE) += memremap.o
 obj-$(CONFIG_RSEQ) += rseq.o
 
+obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
+KASAN_SANITIZE_stackleak.o := n
+KCOV_INSTRUMENT_stackleak.o := n
+
 $(obj)/configs.o: $(obj)/config_data.h
 
 targets += config_data.gz
index 6377225b208204c1c2d8829a778f50ebaa7d816d..1a796e0799ec4a524aee5c325734f0ab22a410ac 100644 (file)
@@ -553,7 +553,6 @@ bool is_bpf_text_address(unsigned long addr)
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
                    char *sym)
 {
-       unsigned long symbol_start, symbol_end;
        struct bpf_prog_aux *aux;
        unsigned int it = 0;
        int ret = -ERANGE;
@@ -566,10 +565,9 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
                if (it++ != symnum)
                        continue;
 
-               bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
                bpf_get_prog_name(aux->prog, sym);
 
-               *value = symbol_start;
+               *value = (unsigned long)aux->prog->bpf_func;
                *type  = BPF_SYM_ELF_TYPE;
 
                ret = 0;
index ccb93277aae2c607e7b6ef079e5432d89ef4a1f6..cf5040fd54344dd798f73464eadb5b9684300f1c 100644 (file)
@@ -2078,6 +2078,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
                info.jited_prog_len = 0;
                info.xlated_prog_len = 0;
                info.nr_jited_ksyms = 0;
+               info.nr_jited_func_lens = 0;
                goto done;
        }
 
@@ -2158,11 +2159,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
        }
 
        ulen = info.nr_jited_ksyms;
-       info.nr_jited_ksyms = prog->aux->func_cnt;
+       info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
        if (info.nr_jited_ksyms && ulen) {
                if (bpf_dump_raw_ok()) {
+                       unsigned long ksym_addr;
                        u64 __user *user_ksyms;
-                       ulong ksym_addr;
                        u32 i;
 
                        /* copy the address of the kernel symbol
@@ -2170,10 +2171,17 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
                         */
                        ulen = min_t(u32, info.nr_jited_ksyms, ulen);
                        user_ksyms = u64_to_user_ptr(info.jited_ksyms);
-                       for (i = 0; i < ulen; i++) {
-                               ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
-                               ksym_addr &= PAGE_MASK;
-                               if (put_user((u64) ksym_addr, &user_ksyms[i]))
+                       if (prog->aux->func_cnt) {
+                               for (i = 0; i < ulen; i++) {
+                                       ksym_addr = (unsigned long)
+                                               prog->aux->func[i]->bpf_func;
+                                       if (put_user((u64) ksym_addr,
+                                                    &user_ksyms[i]))
+                                               return -EFAULT;
+                               }
+                       } else {
+                               ksym_addr = (unsigned long) prog->bpf_func;
+                               if (put_user((u64) ksym_addr, &user_ksyms[0]))
                                        return -EFAULT;
                        }
                } else {
@@ -2182,7 +2190,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
        }
 
        ulen = info.nr_jited_func_lens;
-       info.nr_jited_func_lens = prog->aux->func_cnt;
+       info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
        if (info.nr_jited_func_lens && ulen) {
                if (bpf_dump_raw_ok()) {
                        u32 __user *user_lens;
@@ -2191,9 +2199,16 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
                        /* copy the JITed image lengths for each function */
                        ulen = min_t(u32, info.nr_jited_func_lens, ulen);
                        user_lens = u64_to_user_ptr(info.jited_func_lens);
-                       for (i = 0; i < ulen; i++) {
-                               func_len = prog->aux->func[i]->jited_len;
-                               if (put_user(func_len, &user_lens[i]))
+                       if (prog->aux->func_cnt) {
+                               for (i = 0; i < ulen; i++) {
+                                       func_len =
+                                               prog->aux->func[i]->jited_len;
+                                       if (put_user(func_len, &user_lens[i]))
+                                               return -EFAULT;
+                               }
+                       } else {
+                               func_len = prog->jited_len;
+                               if (put_user(func_len, &user_lens[0]))
                                        return -EFAULT;
                        }
                } else {
index 171a2c88e77ddd28ea5f9d5ba135d424cd09714e..1971ca325fb4e765be7156299e7f02983687019f 100644 (file)
@@ -2852,10 +2852,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                regs[BPF_REG_0].type = NOT_INIT;
        } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
                   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
-               if (fn->ret_type == RET_PTR_TO_MAP_VALUE)
-                       regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
-               else
-                       regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
                /* There is no offset yet applied, variable or fixed */
                mark_reg_known_zero(env, regs, BPF_REG_0);
                /* remember map_ptr, so that check_map_access()
@@ -2868,7 +2864,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                        return -EINVAL;
                }
                regs[BPF_REG_0].map_ptr = meta.map_ptr;
-               regs[BPF_REG_0].id = ++env->id_gen;
+               if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
+                       regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
+               } else {
+                       regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+                       regs[BPF_REG_0].id = ++env->id_gen;
+               }
        } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
                int id = acquire_reference_state(env, insn_idx);
                if (id < 0)
@@ -3046,7 +3047,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
                        dst_reg->umax_value = umax_ptr;
                        dst_reg->var_off = ptr_reg->var_off;
                        dst_reg->off = ptr_reg->off + smin_val;
-                       dst_reg->range = ptr_reg->range;
+                       dst_reg->raw = ptr_reg->raw;
                        break;
                }
                /* A new variable offset is created.  Note that off_reg->off
@@ -3076,10 +3077,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
                }
                dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
                dst_reg->off = ptr_reg->off;
+               dst_reg->raw = ptr_reg->raw;
                if (reg_is_pkt_pointer(ptr_reg)) {
                        dst_reg->id = ++env->id_gen;
                        /* something was added to pkt_ptr, set range to zero */
-                       dst_reg->range = 0;
+                       dst_reg->raw = 0;
                }
                break;
        case BPF_SUB:
@@ -3108,7 +3110,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
                        dst_reg->var_off = ptr_reg->var_off;
                        dst_reg->id = ptr_reg->id;
                        dst_reg->off = ptr_reg->off - smin_val;
-                       dst_reg->range = ptr_reg->range;
+                       dst_reg->raw = ptr_reg->raw;
                        break;
                }
                /* A new variable offset is created.  If the subtrahend is known
@@ -3134,11 +3136,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
                }
                dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
                dst_reg->off = ptr_reg->off;
+               dst_reg->raw = ptr_reg->raw;
                if (reg_is_pkt_pointer(ptr_reg)) {
                        dst_reg->id = ++env->id_gen;
                        /* something was added to pkt_ptr, set range to zero */
                        if (smin_val < 0)
-                               dst_reg->range = 0;
+                               dst_reg->raw = 0;
                }
                break;
        case BPF_AND:
index 8b79318810ad5c63d9e70cd634f6d6bc928659ef..6aaf5dd5383bba294719772bc76b7c1664d54ea9 100644 (file)
@@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
 }
 
 /**
- * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
  * @cgrp: the cgroup of interest
  * @ss: the subsystem of interest (%NULL returns @cgrp->self)
  *
@@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
  * enabled.  If @ss is associated with the hierarchy @cgrp is on, this
  * function is guaranteed to return non-NULL css.
  */
-static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
-                                                       struct cgroup_subsys *ss)
+static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+                                               struct cgroup_subsys *ss)
 {
        lockdep_assert_held(&cgroup_mutex);
 
@@ -523,35 +523,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
        return cgroup_css(cgrp, ss);
 }
 
-/**
- * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest
- *
- * Find and get the effective css of @cgrp for @ss.  The effective css is
- * defined as the matching css of the nearest ancestor including self which
- * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
- * the root css is returned, so this function always returns a valid css.
- *
- * The returned css is not guaranteed to be online, and therefore it is the
- * callers responsiblity to tryget a reference for it.
- */
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
-                                        struct cgroup_subsys *ss)
-{
-       struct cgroup_subsys_state *css;
-
-       do {
-               css = cgroup_css(cgrp, ss);
-
-               if (css)
-                       return css;
-               cgrp = cgroup_parent(cgrp);
-       } while (cgrp);
-
-       return init_css_set.subsys[ss->id];
-}
-
 /**
  * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
  * @cgrp: the cgroup of interest
@@ -634,11 +605,10 @@ EXPORT_SYMBOL_GPL(of_css);
  *
  * Should be called under cgroup_[tree_]mutex.
  */
-#define for_each_e_css(css, ssid, cgrp)                                            \
-       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)            \
-               if (!((css) = cgroup_e_css_by_mask(cgrp,                    \
-                                                  cgroup_subsys[(ssid)]))) \
-                       ;                                                   \
+#define for_each_e_css(css, ssid, cgrp)                                        \
+       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)        \
+               if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
+                       ;                                               \
                else
 
 /**
@@ -1037,7 +1007,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
                         * @ss is in this hierarchy, so we want the
                         * effective css from @cgrp.
                         */
-                       template[i] = cgroup_e_css_by_mask(cgrp, ss);
+                       template[i] = cgroup_e_css(cgrp, ss);
                } else {
                        /*
                         * @ss is not in this hierarchy, so we don't want
@@ -3054,7 +3024,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
                return ret;
 
        /*
-        * At this point, cgroup_e_css_by_mask() results reflect the new csses
+        * At this point, cgroup_e_css() results reflect the new csses
         * making the following cgroup_update_dfl_csses() properly update
         * css associations of all tasks in the subtree.
         */
index 108fecc20fc148e66f78be657bc80c22590ff5ca..208481d9109030403357b36ada51074f14f37293 100644 (file)
@@ -20,6 +20,7 @@ CONFIG_PARAVIRT=y
 CONFIG_KVM_GUEST=y
 CONFIG_S390_GUEST=y
 CONFIG_VIRTIO=y
+CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_VIRTIO_CONSOLE=y
index 8c490130c4fb0072838801534948d9ec6b9a285f..84530ab358c37ad876744d5ebda5d277dfb6d065 100644 (file)
@@ -750,7 +750,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
        /*
         * Do not update time when cgroup is not active
         */
-       if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
+       if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
                __update_cgrp_time(event->cgrp);
 }
 
index 8f82a3bdcb8feff10a8ce4c8d608a406890b6673..07cddff89c7b6bac3658c8cb41dd32dc64a3cfa4 100644 (file)
@@ -91,6 +91,7 @@
 #include <linux/kcov.h>
 #include <linux/livepatch.h>
 #include <linux/thread_info.h>
+#include <linux/stackleak.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1926,6 +1927,8 @@ static __latent_entropy struct task_struct *copy_process(
        if (retval)
                goto bad_fork_cleanup_io;
 
+       stackleak_task_init(p);
+
        if (pid != &init_struct_pid) {
                pid = alloc_pid(p->nsproxy->pid_ns_for_children);
                if (IS_ERR(pid)) {
index 6e6d467f3dec57717ffb7cfae9098dedafb02b22..1f0985adf19340fdb9ec02cf9118c035d8f8e346 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/cpu.h>
 #include <linux/irq.h>
 
-#define IRQ_MATRIX_SIZE        (BITS_TO_LONGS(IRQ_MATRIX_BITS) * sizeof(unsigned long))
+#define IRQ_MATRIX_SIZE        (BITS_TO_LONGS(IRQ_MATRIX_BITS))
 
 struct cpumap {
        unsigned int            available;
index c6a3b6851372c480005d4f053757ba02ad101d8f..35cf0ad29718ffdb0a35dd9c8b2313645c91c6fd 100644 (file)
@@ -25,8 +25,6 @@
 #include <linux/elf.h>
 #include <linux/elfcore.h>
 #include <linux/kernel.h>
-#include <linux/kexec.h>
-#include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/vmalloc.h>
 #include "kexec_internal.h"
index b3a3a1fc499eaf386b3b3e1c51f2100b7226ff37..b0fbf685c77a52ba45dc4c3e2044782080a74953 100644 (file)
@@ -319,16 +319,23 @@ int release_resource(struct resource *old)
 EXPORT_SYMBOL(release_resource);
 
 /**
- * Finds the lowest iomem resource that covers part of [start..end].  The
- * caller must specify start, end, flags, and desc (which may be
+ * Finds the lowest iomem resource that covers part of [@start..@end].  The
+ * caller must specify @start, @end, @flags, and @desc (which may be
  * IORES_DESC_NONE).
  *
- * If a resource is found, returns 0 and *res is overwritten with the part
- * of the resource that's within [start..end]; if none is found, returns
- * -1.
+ * If a resource is found, returns 0 and @*res is overwritten with the part
+ * of the resource that's within [@start..@end]; if none is found, returns
+ * -1 or -EINVAL for other invalid parameters.
  *
  * This function walks the whole tree and not just first level children
  * unless @first_lvl is true.
+ *
+ * @start:     start address of the resource searched for
+ * @end:       end address of same resource
+ * @flags:     flags which the resource must have
+ * @desc:      descriptor the resource must have
+ * @first_lvl: walk only the first level children, if set
+ * @res:       return ptr, if resource found
  */
 static int find_next_iomem_res(resource_size_t start, resource_size_t end,
                               unsigned long flags, unsigned long desc,
@@ -399,6 +406,8 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
  * @flags: I/O resource flags
  * @start: start addr
  * @end: end addr
+ * @arg: function argument for the callback @func
+ * @func: callback function that is called for each qualifying resource area
  *
  * NOTE: For a new descriptor search, define a new IORES_DESC in
  * <linux/ioport.h> and set it in 'desc' of a target resource entry.
index 2e2955a8cf8fe3648a007036dde85320f5834a45..a21ea60219293a0be6cc65ee63918f650b2606e1 100644 (file)
@@ -1561,7 +1561,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 
        /*
         * We may dequeue prev's rt_rq in put_prev_task().
-        * So, we update time before rt_nr_running check.
+        * So, we update time before rt_queued check.
         */
        if (prev->sched_class == &rt_sched_class)
                update_curr_rt(rq);
index 9d74371e4aad86a436c549045be08c2a1ed30853..8d7f15ba59163c3a496f0284c2473a0575d57958 100644 (file)
@@ -1337,7 +1337,7 @@ void sched_init_numa(void)
        int level = 0;
        int i, j, k;
 
-       sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL);
+       sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
        if (!sched_domains_numa_distance)
                return;
 
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
new file mode 100644 (file)
index 0000000..e428929
--- /dev/null
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code fills the used part of the kernel stack with a poison value
+ * before returning to userspace. It's part of the STACKLEAK feature
+ * ported from grsecurity/PaX.
+ *
+ * Author: Alexander Popov <alex.popov@linux.com>
+ *
+ * STACKLEAK reduces the information which kernel stack leak bugs can
+ * reveal and blocks some uninitialized stack variable attacks.
+ */
+
+#include <linux/stackleak.h>
+
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+#include <linux/jump_label.h>
+#include <linux/sysctl.h>
+
+static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
+
+int stack_erasing_sysctl(struct ctl_table *table, int write,
+                       void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int ret = 0;
+       int state = !static_branch_unlikely(&stack_erasing_bypass);
+       int prev_state = state;
+
+       table->data = &state;
+       table->maxlen = sizeof(int);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       state = !!state;
+       if (ret || !write || state == prev_state)
+               return ret;
+
+       if (state)
+               static_branch_disable(&stack_erasing_bypass);
+       else
+               static_branch_enable(&stack_erasing_bypass);
+
+       pr_warn("stackleak: kernel stack erasing is %s\n",
+                                       state ? "enabled" : "disabled");
+       return ret;
+}
+
+#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass)
+#else
+#define skip_erasing() false
+#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
+
+asmlinkage void stackleak_erase(void)
+{
+       /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
+       unsigned long kstack_ptr = current->lowest_stack;
+       unsigned long boundary = (unsigned long)end_of_stack(current);
+       unsigned int poison_count = 0;
+       const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+
+       if (skip_erasing())
+               return;
+
+       /* Check that 'lowest_stack' value is sane */
+       if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
+               kstack_ptr = boundary;
+
+       /* Search for the poison value in the kernel stack */
+       while (kstack_ptr > boundary && poison_count <= depth) {
+               if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
+                       poison_count++;
+               else
+                       poison_count = 0;
+
+               kstack_ptr -= sizeof(unsigned long);
+       }
+
+       /*
+        * One 'long int' at the bottom of the thread stack is reserved and
+        * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
+        */
+       if (kstack_ptr == boundary)
+               kstack_ptr += sizeof(unsigned long);
+
+#ifdef CONFIG_STACKLEAK_METRICS
+       current->prev_lowest_stack = kstack_ptr;
+#endif
+
+       /*
+        * Now write the poison value to the kernel stack. Start from
+        * 'kstack_ptr' and move up till the new 'boundary'. We assume that
+        * the stack pointer doesn't change when we write poison.
+        */
+       if (on_thread_stack())
+               boundary = current_stack_pointer;
+       else
+               boundary = current_top_of_stack();
+
+       while (kstack_ptr < boundary) {
+               *(unsigned long *)kstack_ptr = STACKLEAK_POISON;
+               kstack_ptr += sizeof(unsigned long);
+       }
+
+       /* Reset the 'lowest_stack' value for the next syscall */
+       current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
+}
+
+void __used stackleak_track_stack(void)
+{
+       /*
+        * N.B. stackleak_erase() fills the kernel stack with the poison value,
+        * which has the register width. That code assumes that the value
+        * of 'lowest_stack' is aligned on the register width boundary.
+        *
+        * That is true for x86 and x86_64 because of the kernel stack
+        * alignment on these platforms (for details, see 'cc_stack_align' in
+        * arch/x86/Makefile). Take care of that when you port STACKLEAK to
+        * new platforms.
+        */
+       unsigned long sp = (unsigned long)&sp;
+
+       /*
+        * Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than
+        * STACKLEAK_SEARCH_DEPTH makes the poison search in
+        * stackleak_erase() unreliable. Let's prevent that.
+        */
+       BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH);
+
+       if (sp < current->lowest_stack &&
+           sp >= (unsigned long)task_stack_page(current) +
+                                               sizeof(unsigned long)) {
+               current->lowest_stack = sp;
+       }
+}
+EXPORT_SYMBOL(stackleak_track_stack);
index cc02050fd0c493378228eb6960e15449754f1387..5fc724e4e454c3304ecaebe7c868eb622f784eb8 100644 (file)
@@ -66,7 +66,6 @@
 #include <linux/kexec.h>
 #include <linux/bpf.h>
 #include <linux/mount.h>
-#include <linux/pipe_fs_i.h>
 
 #include <linux/uaccess.h>
 #include <asm/processor.h>
@@ -91,7 +90,9 @@
 #ifdef CONFIG_CHR_DEV_SG
 #include <scsi/sg.h>
 #endif
-
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+#include <linux/stackleak.h>
+#endif
 #ifdef CONFIG_LOCKUP_DETECTOR
 #include <linux/nmi.h>
 #endif
@@ -1232,6 +1233,17 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
+#endif
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+       {
+               .procname       = "stack_erasing",
+               .data           = NULL,
+               .maxlen         = sizeof(int),
+               .mode           = 0600,
+               .proc_handler   = stack_erasing_sysctl,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
 #endif
        { }
 };
index e3a7f7fd3abc1aaf337b18a1eb268126f4308bd2..ad204cf6d0018ef99c00d4b30e942209b77d890c 100644 (file)
@@ -842,7 +842,7 @@ int get_timespec64(struct timespec64 *ts,
        ts->tv_sec = kts.tv_sec;
 
        /* Zero out the padding for 32 bit systems or in compat mode */
-       if (IS_ENABLED(CONFIG_64BIT_TIME) && (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()))
+       if (IS_ENABLED(CONFIG_64BIT_TIME) && in_compat_syscall())
                kts.tv_nsec &= 0xFFFFFFFFUL;
 
        ts->tv_nsec = kts.tv_nsec;
index fac0ddf8a8e22505749be3064e6b964ba12d4930..2868d85f1fb1d3286984c4727f0519957ac069a9 100644 (file)
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
        if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
                return NULL;
 
-       if (!bio->bi_blkg)
+       if (!bio->bi_css)
                return NULL;
-       return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
+       return cgroup_get_kernfs_id(bio->bi_css->cgroup);
 }
 #else
 static union kernfs_node_id *
index b0875b327f5c3d5c733f79337bef40d5d600fbd1..c3fd849d4a8f93c27d18713ae1e492faa5d24ec9 100644 (file)
@@ -115,7 +115,7 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
  * section, then we need to read the link list pointers. The trick is
  * we pass the address of the string to the seq function just like
  * we do for the kernel core formats. To get back the structure that
- * holds the format, we simply use containerof() and then go to the
+ * holds the format, we simply use container_of() and then go to the
  * next format in the list.
  */
 static const char **
index 3ef15a6683c002bc2c5402b5be8ad07c903021bc..bd30e9398d2a8b2afbc49839c969efd277ae2728 100644 (file)
@@ -535,7 +535,7 @@ int traceprobe_update_arg(struct probe_arg *arg)
                        if (code[1].op != FETCH_OP_IMM)
                                return -EINVAL;
 
-                       tmp = strpbrk("+-", code->data);
+                       tmp = strpbrk(code->data, "+-");
                        if (tmp)
                                c = *tmp;
                        ret = traceprobe_split_symbol_offset(code->data,
index e5222b5fb4fe6c3868c78ee3602518ce835c85e3..923414a246e9e4eb4bd422e8146133cad50db45f 100644 (file)
@@ -974,10 +974,6 @@ static ssize_t map_write(struct file *file, const char __user *buf,
        if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
                goto out;
 
-       ret = sort_idmaps(&new_map);
-       if (ret < 0)
-               goto out;
-
        ret = -EPERM;
        /* Map the lower ids from the parent user namespace to the
         * kernel global id space.
@@ -1004,6 +1000,14 @@ static ssize_t map_write(struct file *file, const char __user *buf,
                e->lower_first = lower_first;
        }
 
+       /*
+        * If we want to use binary search for lookup, this clones the extent
+        * array and sorts both copies.
+        */
+       ret = sort_idmaps(&new_map);
+       if (ret < 0)
+               goto out;
+
        /* Install the map */
        if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
                memcpy(map->extent, new_map.extent,
index 8be175df30753c95692007a5d41503838344d9a5..7ebccb5c16377eb77176d0df689e1cfb7af7b36d 100644 (file)
@@ -83,6 +83,7 @@
                        const struct kvec *kvec;                \
                        struct kvec v;                          \
                        iterate_kvec(i, n, v, kvec, skip, (K))  \
+               } else if (unlikely(i->type & ITER_DISCARD)) {  \
                } else {                                        \
                        const struct iovec *iov;                \
                        struct iovec v;                         \
                        }                                       \
                        i->nr_segs -= kvec - i->kvec;           \
                        i->kvec = kvec;                         \
+               } else if (unlikely(i->type & ITER_DISCARD)) {  \
+                       skip += n;                              \
                } else {                                        \
                        const struct iovec *iov;                \
                        struct iovec v;                         \
@@ -428,17 +431,19 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
-void iov_iter_init(struct iov_iter *i, int direction,
+void iov_iter_init(struct iov_iter *i, unsigned int direction,
                        const struct iovec *iov, unsigned long nr_segs,
                        size_t count)
 {
+       WARN_ON(direction & ~(READ | WRITE));
+       direction &= READ | WRITE;
+
        /* It will get better.  Eventually... */
        if (uaccess_kernel()) {
-               direction |= ITER_KVEC;
-               i->type = direction;
+               i->type = ITER_KVEC | direction;
                i->kvec = (struct kvec *)iov;
        } else {
-               i->type = direction;
+               i->type = ITER_IOVEC | direction;
                i->iov = iov;
        }
        i->nr_segs = nr_segs;
@@ -558,7 +563,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
        const char *from = addr;
-       if (unlikely(i->type & ITER_PIPE))
+       if (unlikely(iov_iter_is_pipe(i)))
                return copy_pipe_to_iter(addr, bytes, i);
        if (iter_is_iovec(i))
                might_fault();
@@ -658,7 +663,7 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
        const char *from = addr;
        unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 
-       if (unlikely(i->type & ITER_PIPE))
+       if (unlikely(iov_iter_is_pipe(i)))
                return copy_pipe_to_iter_mcsafe(addr, bytes, i);
        if (iter_is_iovec(i))
                might_fault();
@@ -692,7 +697,7 @@ EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i))) {
                WARN_ON(1);
                return 0;
        }
@@ -712,7 +717,7 @@ EXPORT_SYMBOL(_copy_from_iter);
 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i))) {
                WARN_ON(1);
                return false;
        }
@@ -739,7 +744,7 @@ EXPORT_SYMBOL(_copy_from_iter_full);
 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i))) {
                WARN_ON(1);
                return 0;
        }
@@ -773,7 +778,7 @@ EXPORT_SYMBOL(_copy_from_iter_nocache);
 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i))) {
                WARN_ON(1);
                return 0;
        }
@@ -794,7 +799,7 @@ EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i))) {
                WARN_ON(1);
                return false;
        }
@@ -836,7 +841,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
                kunmap_atomic(kaddr);
                return wanted;
-       } else if (likely(!(i->type & ITER_PIPE)))
+       } else if (unlikely(iov_iter_is_discard(i)))
+               return bytes;
+       else if (likely(!iov_iter_is_pipe(i)))
                return copy_page_to_iter_iovec(page, offset, bytes, i);
        else
                return copy_page_to_iter_pipe(page, offset, bytes, i);
@@ -848,7 +855,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 {
        if (unlikely(!page_copy_sane(page, offset, bytes)))
                return 0;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
                WARN_ON(1);
                return 0;
        }
@@ -888,7 +895,7 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 
 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 {
-       if (unlikely(i->type & ITER_PIPE))
+       if (unlikely(iov_iter_is_pipe(i)))
                return pipe_zero(bytes, i);
        iterate_and_advance(i, bytes, v,
                clear_user(v.iov_base, v.iov_len),
@@ -908,7 +915,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
                kunmap_atomic(kaddr);
                return 0;
        }
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
                kunmap_atomic(kaddr);
                WARN_ON(1);
                return 0;
@@ -972,10 +979,14 @@ static void pipe_advance(struct iov_iter *i, size_t size)
 
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i))) {
                pipe_advance(i, size);
                return;
        }
+       if (unlikely(iov_iter_is_discard(i))) {
+               i->count -= size;
+               return;
+       }
        iterate_and_advance(i, size, v, 0, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
@@ -987,7 +998,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
        if (WARN_ON(unroll > MAX_RW_COUNT))
                return;
        i->count += unroll;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i))) {
                struct pipe_inode_info *pipe = i->pipe;
                int idx = i->idx;
                size_t off = i->iov_offset;
@@ -1011,12 +1022,14 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
                pipe_truncate(i);
                return;
        }
+       if (unlikely(iov_iter_is_discard(i)))
+               return;
        if (unroll <= i->iov_offset) {
                i->iov_offset -= unroll;
                return;
        }
        unroll -= i->iov_offset;
-       if (i->type & ITER_BVEC) {
+       if (iov_iter_is_bvec(i)) {
                const struct bio_vec *bvec = i->bvec;
                while (1) {
                        size_t n = (--bvec)->bv_len;
@@ -1049,23 +1062,25 @@ EXPORT_SYMBOL(iov_iter_revert);
  */
 size_t iov_iter_single_seg_count(const struct iov_iter *i)
 {
-       if (unlikely(i->type & ITER_PIPE))
+       if (unlikely(iov_iter_is_pipe(i)))
                return i->count;        // it is a silly place, anyway
        if (i->nr_segs == 1)
                return i->count;
-       else if (i->type & ITER_BVEC)
+       if (unlikely(iov_iter_is_discard(i)))
+               return i->count;
+       else if (iov_iter_is_bvec(i))
                return min(i->count, i->bvec->bv_len - i->iov_offset);
        else
                return min(i->count, i->iov->iov_len - i->iov_offset);
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
 
-void iov_iter_kvec(struct iov_iter *i, int direction,
+void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
                        const struct kvec *kvec, unsigned long nr_segs,
                        size_t count)
 {
-       BUG_ON(!(direction & ITER_KVEC));
-       i->type = direction;
+       WARN_ON(direction & ~(READ | WRITE));
+       i->type = ITER_KVEC | (direction & (READ | WRITE));
        i->kvec = kvec;
        i->nr_segs = nr_segs;
        i->iov_offset = 0;
@@ -1073,12 +1088,12 @@ void iov_iter_kvec(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_kvec);
 
-void iov_iter_bvec(struct iov_iter *i, int direction,
+void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
                        const struct bio_vec *bvec, unsigned long nr_segs,
                        size_t count)
 {
-       BUG_ON(!(direction & ITER_BVEC));
-       i->type = direction;
+       WARN_ON(direction & ~(READ | WRITE));
+       i->type = ITER_BVEC | (direction & (READ | WRITE));
        i->bvec = bvec;
        i->nr_segs = nr_segs;
        i->iov_offset = 0;
@@ -1086,13 +1101,13 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_bvec);
 
-void iov_iter_pipe(struct iov_iter *i, int direction,
+void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
                        struct pipe_inode_info *pipe,
                        size_t count)
 {
-       BUG_ON(direction != ITER_PIPE);
+       BUG_ON(direction != READ);
        WARN_ON(pipe->nrbufs == pipe->buffers);
-       i->type = direction;
+       i->type = ITER_PIPE | READ;
        i->pipe = pipe;
        i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
        i->iov_offset = 0;
@@ -1101,12 +1116,30 @@ void iov_iter_pipe(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_pipe);
 
+/**
+ * iov_iter_discard - Initialise an I/O iterator that discards data
+ * @i: The iterator to initialise.
+ * @direction: The direction of the transfer.
+ * @count: The size of the I/O buffer in bytes.
+ *
+ * Set up an I/O iterator that just discards everything that's written to it.
+ * It's only available as a READ iterator.
+ */
+void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
+{
+       BUG_ON(direction != READ);
+       i->type = ITER_DISCARD | READ;
+       i->count = count;
+       i->iov_offset = 0;
+}
+EXPORT_SYMBOL(iov_iter_discard);
+
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
        unsigned long res = 0;
        size_t size = i->count;
 
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i))) {
                if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
                        return size | i->iov_offset;
                return size;
@@ -1125,7 +1158,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
        unsigned long res = 0;
        size_t size = i->count;
 
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
                WARN_ON(1);
                return ~0U;
        }
@@ -1193,8 +1226,11 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
        if (maxsize > i->count)
                maxsize = i->count;
 
-       if (unlikely(i->type & ITER_PIPE))
+       if (unlikely(iov_iter_is_pipe(i)))
                return pipe_get_pages(i, pages, maxsize, maxpages, start);
+       if (unlikely(iov_iter_is_discard(i)))
+               return -EFAULT;
+
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -1205,7 +1241,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
                        len = maxpages * PAGE_SIZE;
                addr &= ~(PAGE_SIZE - 1);
                n = DIV_ROUND_UP(len, PAGE_SIZE);
-               res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+               res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages);
                if (unlikely(res < 0))
                        return res;
                return (res == n ? len : res * PAGE_SIZE) - *start;
@@ -1270,8 +1306,11 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
        if (maxsize > i->count)
                maxsize = i->count;
 
-       if (unlikely(i->type & ITER_PIPE))
+       if (unlikely(iov_iter_is_pipe(i)))
                return pipe_get_pages_alloc(i, pages, maxsize, start);
+       if (unlikely(iov_iter_is_discard(i)))
+               return -EFAULT;
+
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -1283,7 +1322,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
                p = get_pages_array(n);
                if (!p)
                        return -ENOMEM;
-               res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+               res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p);
                if (unlikely(res < 0)) {
                        kvfree(p);
                        return res;
@@ -1313,7 +1352,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
                WARN_ON(1);
                return 0;
        }
@@ -1355,7 +1394,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
                WARN_ON(1);
                return false;
        }
@@ -1400,7 +1439,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
                WARN_ON(1);     /* for now */
                return 0;
        }
@@ -1442,8 +1481,10 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 
        if (!size)
                return 0;
+       if (unlikely(iov_iter_is_discard(i)))
+               return 0;
 
-       if (unlikely(i->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(i))) {
                struct pipe_inode_info *pipe = i->pipe;
                size_t off;
                int idx;
@@ -1481,11 +1522,13 @@ EXPORT_SYMBOL(iov_iter_npages);
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 {
        *new = *old;
-       if (unlikely(new->type & ITER_PIPE)) {
+       if (unlikely(iov_iter_is_pipe(new))) {
                WARN_ON(1);
                return NULL;
        }
-       if (new->type & ITER_BVEC)
+       if (unlikely(iov_iter_is_discard(new)))
+               return NULL;
+       if (iov_iter_is_bvec(new))
                return new->bvec = kmemdup(new->bvec,
                                    new->nr_segs * sizeof(struct bio_vec),
                                    flags);
index 5d73f5cb4d8a78f0887cc6cfbf10a29ec5d5f51c..79777645cac9c1243518f4f4bf403cdc567aa9ea 100644 (file)
@@ -27,7 +27,7 @@ ifeq ($(ARCH),arm)
         CFLAGS += -I../../../arch/arm/include -mfpu=neon
         HAS_NEON = yes
 endif
-ifeq ($(ARCH),arm64)
+ifeq ($(ARCH),aarch64)
         CFLAGS += -I../../../arch/arm64/include
         HAS_NEON = yes
 endif
@@ -41,7 +41,7 @@ ifeq ($(IS_X86),yes)
                    gcc -c -x assembler - >&/dev/null &&        \
                    rm ./-.o && echo -DCONFIG_AS_AVX512=1)
 else ifeq ($(HAS_NEON),yes)
-        OBJS   += neon.o neon1.o neon2.o neon4.o neon8.o
+        OBJS   += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
         HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
index 218d0b2ec82d1534dcb66b4744f886d7d0262d55..81adec8ee02cc3bdb765625e28c3d765f203e512 100644 (file)
@@ -2049,7 +2049,7 @@ find_page:
                                        !mapping->a_ops->is_partially_uptodate)
                                goto page_not_up_to_date;
                        /* pipes can't handle partially uptodate pages */
-                       if (unlikely(iter->type & ITER_PIPE))
+                       if (unlikely(iov_iter_is_pipe(iter)))
                                goto page_not_up_to_date;
                        if (!trylock_page(page))
                                goto page_not_up_to_date;
@@ -2824,6 +2824,42 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 
+/*
+ * Don't operate on ranges the page cache doesn't support, and don't exceed the
+ * LFS limits.  If pos is under the limit it becomes a short access.  If it
+ * exceeds the limit we return -EFBIG.
+ */
+static int generic_access_check_limits(struct file *file, loff_t pos,
+                                      loff_t *count)
+{
+       struct inode *inode = file->f_mapping->host;
+       loff_t max_size = inode->i_sb->s_maxbytes;
+
+       if (!(file->f_flags & O_LARGEFILE))
+               max_size = MAX_NON_LFS;
+
+       if (unlikely(pos >= max_size))
+               return -EFBIG;
+       *count = min(*count, max_size - pos);
+       return 0;
+}
+
+static int generic_write_check_limits(struct file *file, loff_t pos,
+                                     loff_t *count)
+{
+       loff_t limit = rlimit(RLIMIT_FSIZE);
+
+       if (limit != RLIM_INFINITY) {
+               if (pos >= limit) {
+                       send_sig(SIGXFSZ, current, 0);
+                       return -EFBIG;
+               }
+               *count = min(*count, limit - pos);
+       }
+
+       return generic_access_check_limits(file, pos, count);
+}
+
 /*
  * Performs necessary checks before doing a write
  *
@@ -2835,8 +2871,8 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
-       unsigned long limit = rlimit(RLIMIT_FSIZE);
-       loff_t pos;
+       loff_t count;
+       int ret;
 
        if (!iov_iter_count(from))
                return 0;
@@ -2845,43 +2881,99 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
        if (iocb->ki_flags & IOCB_APPEND)
                iocb->ki_pos = i_size_read(inode);
 
-       pos = iocb->ki_pos;
-
        if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
                return -EINVAL;
 
-       if (limit != RLIM_INFINITY) {
-               if (iocb->ki_pos >= limit) {
-                       send_sig(SIGXFSZ, current, 0);
-                       return -EFBIG;
-               }
-               iov_iter_truncate(from, limit - (unsigned long)pos);
-       }
+       count = iov_iter_count(from);
+       ret = generic_write_check_limits(file, iocb->ki_pos, &count);
+       if (ret)
+               return ret;
+
+       iov_iter_truncate(from, count);
+       return iov_iter_count(from);
+}
+EXPORT_SYMBOL(generic_write_checks);
+
+/*
+ * Performs necessary checks before doing a clone.
+ *
+ * Can adjust amount of bytes to clone.
+ * Returns appropriate error code that caller should return or
+ * zero in case the clone should be allowed.
+ */
+int generic_remap_checks(struct file *file_in, loff_t pos_in,
+                        struct file *file_out, loff_t pos_out,
+                        loff_t *req_count, unsigned int remap_flags)
+{
+       struct inode *inode_in = file_in->f_mapping->host;
+       struct inode *inode_out = file_out->f_mapping->host;
+       uint64_t count = *req_count;
+       uint64_t bcount;
+       loff_t size_in, size_out;
+       loff_t bs = inode_out->i_sb->s_blocksize;
+       int ret;
+
+       /* The start of both ranges must be aligned to an fs block. */
+       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
+               return -EINVAL;
+
+       /* Ensure offsets don't wrap. */
+       if (pos_in + count < pos_in || pos_out + count < pos_out)
+               return -EINVAL;
+
+       size_in = i_size_read(inode_in);
+       size_out = i_size_read(inode_out);
+
+       /* Dedupe requires both ranges to be within EOF. */
+       if ((remap_flags & REMAP_FILE_DEDUP) &&
+           (pos_in >= size_in || pos_in + count > size_in ||
+            pos_out >= size_out || pos_out + count > size_out))
+               return -EINVAL;
+
+       /* Ensure the infile range is within the infile. */
+       if (pos_in >= size_in)
+               return -EINVAL;
+       count = min(count, size_in - (uint64_t)pos_in);
+
+       ret = generic_access_check_limits(file_in, pos_in, &count);
+       if (ret)
+               return ret;
+
+       ret = generic_write_check_limits(file_out, pos_out, &count);
+       if (ret)
+               return ret;
 
        /*
-        * LFS rule
+        * If the user wanted us to link to the infile's EOF, round up to the
+        * next block boundary for this check.
+        *
+        * Otherwise, make sure the count is also block-aligned, having
+        * already confirmed the starting offsets' block alignment.
         */
-       if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
-                               !(file->f_flags & O_LARGEFILE))) {
-               if (pos >= MAX_NON_LFS)
-                       return -EFBIG;
-               iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
+       if (pos_in + count == size_in) {
+               bcount = ALIGN(size_in, bs) - pos_in;
+       } else {
+               if (!IS_ALIGNED(count, bs))
+                       count = ALIGN_DOWN(count, bs);
+               bcount = count;
        }
 
+       /* Don't allow overlapped cloning within the same file. */
+       if (inode_in == inode_out &&
+           pos_out + bcount > pos_in &&
+           pos_out < pos_in + bcount)
+               return -EINVAL;
+
        /*
-        * Are we about to exceed the fs block limit ?
-        *
-        * If we have written data it becomes a short write.  If we have
-        * exceeded without writing data we send a signal and return EFBIG.
-        * Linus frestrict idea will clean these up nicely..
+        * We shortened the request but the caller can't deal with that, so
+        * bounce the request back to userspace.
         */
-       if (unlikely(pos >= inode->i_sb->s_maxbytes))
-               return -EFBIG;
+       if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
+               return -EINVAL;
 
-       iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos);
-       return iov_iter_count(from);
+       *req_count = count;
+       return 0;
 }
-EXPORT_SYMBOL(generic_write_checks);
 
 int pagecache_write_begin(struct file *file, struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned flags,
index 4e4ef8fa479d53b7ee7c4c8fcb86985acb790c8a..55478ab3c83be372f9fa4d654f16e32ffdeb1e29 100644 (file)
@@ -629,21 +629,40 @@ release:
  *         available
  * never: never stall for any thp allocation
  */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
        const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+       gfp_t this_node = 0;
+
+#ifdef CONFIG_NUMA
+       struct mempolicy *pol;
+       /*
+        * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+        * specified, to express a general desire to stay on the current
+        * node for optimistic allocation attempts. If the defrag mode
+        * and/or madvise hint requires the direct reclaim then we prefer
+        * to fallback to other node rather than node reclaim because that
+        * can lead to excessive reclaim even though there is free memory
+        * on other nodes. We expect that NUMA preferences are specified
+        * by memory policies.
+        */
+       pol = get_vma_policy(vma, addr);
+       if (pol->mode != MPOL_BIND)
+               this_node = __GFP_THISNODE;
+       mpol_cond_put(pol);
+#endif
 
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
                return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
                return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            __GFP_KSWAPD_RECLAIM);
+                                                            __GFP_KSWAPD_RECLAIM | this_node);
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
                return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            0);
-       return GFP_TRANSHUGE_LIGHT;
+                                                            this_node);
+       return GFP_TRANSHUGE_LIGHT | this_node;
 }
 
 /* Caller must hold page table lock. */
@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
                        pte_free(vma->vm_mm, pgtable);
                return ret;
        }
-       gfp = alloc_hugepage_direct_gfpmask(vma);
-       page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+       gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
        if (unlikely(!page)) {
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 alloc:
        if (transparent_hugepage_enabled(vma) &&
            !transparent_hugepage_debug_cow()) {
-               huge_gfp = alloc_hugepage_direct_gfpmask(vma);
-               new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
+               huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+               new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
+                               haddr, numa_node_id());
        } else
                new_page = NULL;
 
index 54920cbc46bfdcb87b0a4ae3e6b4538596b6bd58..6e1469b80cb7d57d3cdf01ef9c917a21ece0b7fa 100644 (file)
@@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
        struct mem_cgroup *memcg;
        int ret = 0;
 
-       if (memcg_kmem_bypass())
+       if (mem_cgroup_disabled() || memcg_kmem_bypass())
                return 0;
 
        memcg = get_mem_cgroup_from_current();
index 61972da38d93cb54d6f1088df186ed20bb0f98bb..2b2b3ccbbfb5768a3d6b530799ebf5c4c3129688 100644 (file)
@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
        for (i = 0; i < sections_to_remove; i++) {
                unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
 
+               cond_resched();
                ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
                                altmap);
                map_offset = 0;
index cfd26d7e61a17f9c5fd260b85778058aa04b83e2..5837a067124d895f38f6039d9e3739f0a0874fc0 100644 (file)
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
        } else if (PageTransHuge(page)) {
                struct page *thp;
 
-               thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
-                                        HPAGE_PMD_ORDER);
+               thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
+                               address, numa_node_id());
                if (!thp)
                        return NULL;
                prep_transhuge_page(thp);
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
                                                unsigned long addr)
 {
        struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  *     @vma:  Pointer to VMA or NULL if not available.
  *     @addr: Virtual Address of the allocation. Must be inside the VMA.
  *     @node: Which node to prefer for allocation (modulo policy).
- *     @hugepage: for hugepages try only the preferred node if possible
  *
  *     This function allocates a page from the kernel page pool and applies
  *     a NUMA policy associated with the VMA or the current process.
@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-               unsigned long addr, int node, bool hugepage)
+               unsigned long addr, int node)
 {
        struct mempolicy *pol;
        struct page *page;
@@ -2040,32 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
                goto out;
        }
 
-       if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
-               int hpage_node = node;
-
-               /*
-                * For hugepage allocation and non-interleave policy which
-                * allows the current node (or other explicitly preferred
-                * node) we only try to allocate from the current/preferred
-                * node and don't fall back to other nodes, as the cost of
-                * remote accesses would likely offset THP benefits.
-                *
-                * If the policy is interleave, or does not allow the current
-                * node in its nodemask, we allocate the standard way.
-                */
-               if (pol->mode == MPOL_PREFERRED &&
-                                               !(pol->flags & MPOL_F_LOCAL))
-                       hpage_node = pol->v.preferred_node;
-
-               nmask = policy_nodemask(gfp, pol);
-               if (!nmask || node_isset(hpage_node, *nmask)) {
-                       mpol_cond_put(pol);
-                       page = __alloc_pages_node(hpage_node,
-                                               gfp | __GFP_THISNODE, order);
-                       goto out;
-               }
-       }
-
        nmask = policy_nodemask(gfp, pol);
        preferred_nid = policy_node(gfp, pol, node);
        page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
index a451ffa9491ca0c84d3932fb5af1d02ca18f8f62..d4d1c89bcdddcef43dfa04fa02926f21fba228fb 100644 (file)
@@ -294,7 +294,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
                };
                struct iov_iter from;
 
-               iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
+               iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
                init_sync_kiocb(&kiocb, swap_file);
                kiocb.ki_pos = page_file_offset(page);
 
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
                goto out;
        }
        bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
-       bio_associate_blkg_from_page(bio, page);
+       bio_associate_blkcg_from_page(bio, page);
        count_swpout_vm_event(page);
        set_page_writeback(page);
        unlock_page(page);
index f7e2a676365a10256368d3fe9e5dfccfbd5cc17e..f0c15e9017c02236e56cb71948d992c584226d0c 100644 (file)
@@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf)
 }
 early_param("page_poison", early_page_poison_param);
 
+/**
+ * page_poisoning_enabled - check if page poisoning is enabled
+ *
+ * Return true if page poisoning is enabled, or false if not.
+ */
 bool page_poisoning_enabled(void)
 {
        /*
@@ -29,6 +34,7 @@ bool page_poisoning_enabled(void)
                (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
                debug_pagealloc_enabled()));
 }
+EXPORT_SYMBOL_GPL(page_poisoning_enabled);
 
 static void poison_page(struct page *page)
 {
index a6b74c6fe0becd3ef42284aa643933ae4fb7f0cf..db86282fd024580cbf5c41f01cb6d5447a9e1791 100644 (file)
@@ -2591,7 +2591,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
        BUG_ON(ai->nr_groups != 1);
        upa = ai->alloc_size/ai->unit_size;
        nr_g0_units = roundup(num_possible_cpus(), upa);
-       if (unlikely(WARN_ON(ai->groups[0].nr_units != nr_g0_units))) {
+       if (WARN_ON(ai->groups[0].nr_units != nr_g0_units)) {
                pcpu_free_alloc_info(ai);
                return -EINVAL;
        }
index 56bf122e0bb4ddf7b57548e7e4b4a33bbdf9a9ab..ea26d7a0342d77ac67f47e813a73f125c873a1e5 100644 (file)
@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
        shmem_pseudo_vma_init(&pvma, info, hindex);
        page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
        shmem_pseudo_vma_destroy(&pvma);
        if (page)
                prep_transhuge_page(page);
index 5f23e18eecc02f32ac80ddd85658006f9ad15430..2c9a17b9b46bb344897691d5f53c94a90e25c3c8 100644 (file)
@@ -2066,7 +2066,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
        struct kvec kv = {.iov_base = data, .iov_len = count};
        struct iov_iter to;
 
-       iov_iter_kvec(&to, READ | ITER_KVEC, &kv, 1, count);
+       iov_iter_kvec(&to, READ, &kv, 1, count);
 
        p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
                                fid->fid, (unsigned long long) offset, count);
index eb596c2ed546ca5555a2100426eebb67205d99b1..b1d39cabf125a7f90b7029f4355a0a7025af526d 100644 (file)
@@ -329,7 +329,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
        if (!iov_iter_count(data))
                return 0;
 
-       if (!(data->type & ITER_KVEC)) {
+       if (!iov_iter_is_kvec(data)) {
                int n;
                /*
                 * We allow only p9_max_pages pinned. We wait for the
index 4e2576fc0c59932cbb1f3c98c150764b91bb52c1..828e87fe802788d13f13f39efc68ce7558dfb245 100644 (file)
@@ -467,7 +467,7 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
        iv.iov_len = skb->len;
 
        memset(&msg, 0, sizeof(msg));
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, skb->len);
+       iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, skb->len);
 
        err = l2cap_chan_send(chan, &msg, skb->len);
        if (err > 0) {
index 51c2cf2d8923ae8dcb174355f26b0b08634a3892..58fc6333d41225dcb967052eec798af839013e0b 100644 (file)
@@ -63,7 +63,7 @@ static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *dat
 
        memset(&msg, 0, sizeof(msg));
 
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, total_len);
+       iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, total_len);
 
        l2cap_chan_send(chan, &msg, total_len);
 
index a1c1b7e8a45ca6d6c44de507d7a5ff232d776edc..c822e626761bd0ecb51e1b1de1f8dfa5b98b3568 100644 (file)
@@ -622,7 +622,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
 
        memset(&msg, 0, sizeof(msg));
 
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iv, 2, 1 + len);
+       iov_iter_kvec(&msg.msg_iter, WRITE, iv, 2, 1 + len);
 
        l2cap_chan_send(chan, &msg, 1 + len);
 
index 88e35830198cd3ed47b6655c6e31b783bddaf030..57fcc6b4bf6e1d6c33796868e5b816afcd3d235f 100644 (file)
@@ -513,7 +513,7 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
        if (!buf)
                msg.msg_flags |= MSG_TRUNC;
 
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
+       iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len);
        r = sock_recvmsg(sock, &msg, msg.msg_flags);
        if (r == -EAGAIN)
                r = 0;
@@ -532,7 +532,7 @@ static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
        int r;
 
        BUG_ON(page_offset + length > PAGE_SIZE);
-       iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
+       iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length);
        r = sock_recvmsg(sock, &msg, msg.msg_flags);
        if (r == -EAGAIN)
                r = 0;
@@ -594,7 +594,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
        else
                msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
 
-       iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
+       iov_iter_bvec(&msg.msg_iter, WRITE, &bvec, 1, size);
        ret = sock_sendmsg(sock, &msg);
        if (ret == -EAGAIN)
                ret = 0;
index 77d43ae2a7bbe1267f8430d5c35637d1984f463c..0ffcbdd55fa9ee545c807f2ed3fc178830e3075a 100644 (file)
@@ -3272,7 +3272,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
                }
 
                skb = next;
-               if (netif_xmit_stopped(txq) && skb) {
+               if (netif_tx_queue_stopped(txq) && skb) {
                        rc = NETDEV_TX_BUSY;
                        break;
                }
index 5da9552b186bc853904f7c85bbf872925463896c..2b9fdbc43205f3d8cf826b2074493aa5e72401fb 100644 (file)
@@ -717,7 +717,8 @@ int netpoll_setup(struct netpoll *np)
 
                                read_lock_bh(&idev->lock);
                                list_for_each_entry(ifp, &idev->addr_list, if_list) {
-                                       if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
+                                       if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
+                                           !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
                                                continue;
                                        np->local_ip.in6 = ifp->addr;
                                        err = 0;
index f679c7a7d761a60b22f733a443e77b54cb51595f..33d9227a8b8077a8cf6edbcaaa9f5b92d4fee48e 100644 (file)
@@ -3367,7 +3367,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
                        cb->seq = 0;
                }
                ret = dumpit(skb, cb);
-               if (ret < 0)
+               if (ret)
                        break;
        }
        cb->family = idx;
@@ -3600,6 +3600,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
                return -EINVAL;
        }
 
+       if (dev->type != ARPHRD_ETHER) {
+               NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices");
+               return -EINVAL;
+       }
+
        addr = nla_data(tb[NDA_LLADDR]);
 
        err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
@@ -3704,6 +3709,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
                return -EINVAL;
        }
 
+       if (dev->type != ARPHRD_ETHER) {
+               NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices");
+               return -EINVAL;
+       }
+
        addr = nla_data(tb[NDA_LLADDR]);
 
        err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
index 946de0e24c876bbbe63de71b5c7cef91cb967708..b4ee5c8b928f07879b3c7ed08ed5e9b67b08fcb4 100644 (file)
@@ -4944,6 +4944,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
  *
  * This is a helper to do that correctly considering GSO_BY_FRAGS.
  *
+ * @skb: GSO skb
+ *
  * @seg_len: The segmented length (from skb_gso_*_seglen). In the
  *           GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
  *
index 6fcc4bc07d19bd929648f03b136225a69f2eddfc..080a880a1761b8e0efafaddf0ddac5bb87c64f88 100644 (file)
@@ -3279,6 +3279,7 @@ int sock_load_diag_module(int family, int protocol)
 
 #ifdef CONFIG_INET
        if (family == AF_INET &&
+           protocol != IPPROTO_RAW &&
            !rcu_access_pointer(inet_protos[protocol]))
                return -ENOENT;
 #endif
index 4da39446da2d89b529973eb33902577a0e6cbb54..765b2b32c4a4263640563f34b4dd93b5bdf471de 100644 (file)
 #ifdef CONFIG_IP_MULTICAST
 /* Parameter names and values are taken from igmp-v2-06 draft */
 
-#define IGMP_V1_ROUTER_PRESENT_TIMEOUT         (400*HZ)
-#define IGMP_V2_ROUTER_PRESENT_TIMEOUT         (400*HZ)
 #define IGMP_V2_UNSOLICITED_REPORT_INTERVAL    (10*HZ)
 #define IGMP_V3_UNSOLICITED_REPORT_INTERVAL    (1*HZ)
+#define IGMP_QUERY_INTERVAL                    (125*HZ)
 #define IGMP_QUERY_RESPONSE_INTERVAL           (10*HZ)
-#define IGMP_QUERY_ROBUSTNESS_VARIABLE         2
-
 
 #define IGMP_INITIAL_REPORT_DELAY              (1)
 
@@ -935,13 +932,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 
                        max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
                        in_dev->mr_v1_seen = jiffies +
-                               IGMP_V1_ROUTER_PRESENT_TIMEOUT;
+                               (in_dev->mr_qrv * in_dev->mr_qi) +
+                               in_dev->mr_qri;
                        group = 0;
                } else {
                        /* v2 router present */
                        max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
                        in_dev->mr_v2_seen = jiffies +
-                               IGMP_V2_ROUTER_PRESENT_TIMEOUT;
+                               (in_dev->mr_qrv * in_dev->mr_qi) +
+                               in_dev->mr_qri;
                }
                /* cancel the interface change timer */
                in_dev->mr_ifc_count = 0;
@@ -981,8 +980,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
                if (!max_delay)
                        max_delay = 1;  /* can't mod w/ 0 */
                in_dev->mr_maxdelay = max_delay;
-               if (ih3->qrv)
-                       in_dev->mr_qrv = ih3->qrv;
+
+               /* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently
+                * received value was zero, use the default or statically
+                * configured value.
+                */
+               in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+               in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+               /* RFC3376, 8.3. Query Response Interval:
+                * The number of seconds represented by the [Query Response
+                * Interval] must be less than the [Query Interval].
+                */
+               if (in_dev->mr_qri >= in_dev->mr_qi)
+                       in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;
+
                if (!group) { /* general query */
                        if (ih3->nsrcs)
                                return true;    /* no sources allowed */
@@ -1723,18 +1735,30 @@ void ip_mc_down(struct in_device *in_dev)
        ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
 }
 
-void ip_mc_init_dev(struct in_device *in_dev)
-{
 #ifdef CONFIG_IP_MULTICAST
+static void ip_mc_reset(struct in_device *in_dev)
+{
        struct net *net = dev_net(in_dev->dev);
+
+       in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+       in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+       in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+}
+#else
+static void ip_mc_reset(struct in_device *in_dev)
+{
+}
 #endif
+
+void ip_mc_init_dev(struct in_device *in_dev)
+{
        ASSERT_RTNL();
 
 #ifdef CONFIG_IP_MULTICAST
        timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
        timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
-       in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
 #endif
+       ip_mc_reset(in_dev);
 
        spin_lock_init(&in_dev->mc_tomb_lock);
 }
@@ -1744,15 +1768,10 @@ void ip_mc_init_dev(struct in_device *in_dev)
 void ip_mc_up(struct in_device *in_dev)
 {
        struct ip_mc_list *pmc;
-#ifdef CONFIG_IP_MULTICAST
-       struct net *net = dev_net(in_dev->dev);
-#endif
 
        ASSERT_RTNL();
 
-#ifdef CONFIG_IP_MULTICAST
-       in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
-#endif
+       ip_mc_reset(in_dev);
        ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
        for_each_pmc_rtnl(in_dev, pmc) {
index 9b0158fa431f2245c0fa7e21d62e3ac01296dc20..d6ee343fdb8647ea96240d017b72aef2f6790299 100644 (file)
@@ -722,10 +722,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
        if (ip_is_fragment(&iph)) {
                skb = skb_share_check(skb, GFP_ATOMIC);
                if (skb) {
-                       if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
-                               return skb;
-                       if (pskb_trim_rcsum(skb, netoff + len))
-                               return skb;
+                       if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
+                               kfree_skb(skb);
+                               return NULL;
+                       }
+                       if (pskb_trim_rcsum(skb, netoff + len)) {
+                               kfree_skb(skb);
+                               return NULL;
+                       }
                        memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
                        if (ip_defrag(net, skb, user))
                                return NULL;
index 26c36cccabdc2c8cc95cfd609672d412c493fc42..fffcc130900e518874027562272b1052cf0bdd16 100644 (file)
@@ -1246,7 +1246,7 @@ int ip_setsockopt(struct sock *sk, int level,
                return -ENOPROTOOPT;
 
        err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
        if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
            optname < BPFILTER_IPT_SET_MAX)
                err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
@@ -1559,7 +1559,7 @@ int ip_getsockopt(struct sock *sk, int level,
        int err;
 
        err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
        if (optname >= BPFILTER_IPT_SO_GET_INFO &&
            optname < BPFILTER_IPT_GET_MAX)
                err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
@@ -1596,7 +1596,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
        err = do_ip_getsockopt(sk, level, optname, optval, optlen,
                MSG_CMSG_COMPAT);
 
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
        if (optname >= BPFILTER_IPT_SO_GET_INFO &&
            optname < BPFILTER_IPT_GET_MAX)
                err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
index b7918d4caa300a15bec2858065b8f73d71cf6eb0..3b45fe530f91e2e1aa697888e11a78cf7e9d211e 100644 (file)
@@ -145,6 +145,7 @@ msg_bytes_ready:
                        ret = err;
                        goto out;
                }
+               copied = -EAGAIN;
        }
        ret = copied;
 out:
index 3f4d61017a6947c9dfb5cd1a38e5a25f1665928f..f0cd291034f0fa8ece55acd0fccf79e02629c98a 100644 (file)
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
        err = ip6_flowlabel_init();
        if (err)
                goto ip6_flowlabel_fail;
+       err = ipv6_anycast_init();
+       if (err)
+               goto ipv6_anycast_fail;
        err = addrconf_init();
        if (err)
                goto addrconf_fail;
@@ -1091,6 +1094,8 @@ ipv6_frag_fail:
 ipv6_exthdrs_fail:
        addrconf_cleanup();
 addrconf_fail:
+       ipv6_anycast_cleanup();
+ipv6_anycast_fail:
        ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
        ndisc_late_cleanup();
index 4e0ff7031edd55ce6dbb3f2c62e22b9040cc7fec..94999058e11029b637b6ab8201f8706599e49284 100644 (file)
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT   8
+#define IN6_ADDR_HSIZE         BIT(IN6_ADDR_HSIZE_SHIFT)
+/*     anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+       u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+       return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *     socket join an anycast group
  */
@@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk)
        rtnl_unlock();
 }
 
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+       unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+       spin_lock(&acaddr_hash_lock);
+       hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+       spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+       spin_lock(&acaddr_hash_lock);
+       hlist_del_init_rcu(&aca->aca_addr_lst);
+       spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
        refcount_inc(&aca->aca_refcnt);
 }
 
+static void aca_free_rcu(struct rcu_head *h)
+{
+       struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+       fib6_info_release(aca->aca_rt);
+       kfree(aca);
+}
+
 static void aca_put(struct ifacaddr6 *ac)
 {
        if (refcount_dec_and_test(&ac->aca_refcnt)) {
-               fib6_info_release(ac->aca_rt);
-               kfree(ac);
+               call_rcu(&ac->rcu, aca_free_rcu);
        }
 }
 
@@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
        aca->aca_addr = *addr;
        fib6_info_hold(f6i);
        aca->aca_rt = f6i;
+       INIT_HLIST_NODE(&aca->aca_addr_lst);
        aca->aca_users = 1;
        /* aca_tstamp should be updated upon changes */
        aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
        aca_get(aca);
        write_unlock_bh(&idev->lock);
 
+       ipv6_add_acaddr_hash(net, aca);
+
        ip6_ins_rt(net, f6i);
 
        addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
        else
                idev->ac_list = aca->aca_next;
        write_unlock_bh(&idev->lock);
+       ipv6_del_acaddr_hash(aca);
        addrconf_leave_solict(idev, &aca->aca_addr);
 
        ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
                idev->ac_list = aca->aca_next;
                write_unlock_bh(&idev->lock);
 
+               ipv6_del_acaddr_hash(aca);
+
                addrconf_leave_solict(idev, &aca->aca_addr);
 
                ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
                         const struct in6_addr *addr)
 {
+       unsigned int hash = inet6_acaddr_hash(net, addr);
+       struct net_device *nh_dev;
+       struct ifacaddr6 *aca;
        bool found = false;
 
        rcu_read_lock();
        if (dev)
                found = ipv6_chk_acast_dev(dev, addr);
        else
-               for_each_netdev_rcu(net, dev)
-                       if (ipv6_chk_acast_dev(dev, addr)) {
+               hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+                                        aca_addr_lst) {
+                       nh_dev = fib6_info_nh_dev(aca->aca_rt);
+                       if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+                               continue;
+                       if (ipv6_addr_equal(&aca->aca_addr, addr)) {
                                found = true;
                                break;
                        }
+               }
        rcu_read_unlock();
        return found;
 }
@@ -540,3 +591,24 @@ void ac6_proc_exit(struct net *net)
        remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
+
+/*     Init / cleanup code
+ */
+int __init ipv6_anycast_init(void)
+{
+       int i;
+
+       for (i = 0; i < IN6_ADDR_HSIZE; i++)
+               INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+       return 0;
+}
+
+void ipv6_anycast_cleanup(void)
+{
+       int i;
+
+       spin_lock(&acaddr_hash_lock);
+       for (i = 0; i < IN6_ADDR_HSIZE; i++)
+               WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+       spin_unlock(&acaddr_hash_lock);
+}
index 1b8bc008b53b642adef3ba9335563d430a99c1a9..ae3786132c236b2bcde4f8f3008fceb2d6bc1cdd 100644 (file)
@@ -591,7 +591,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 
        /* fib entries are never clones */
        if (arg.filter.flags & RTM_F_CLONED)
-               return skb->len;
+               goto out;
 
        w = (void *)cb->args[2];
        if (!w) {
@@ -621,7 +621,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
                tb = fib6_get_table(net, arg.filter.table_id);
                if (!tb) {
                        if (arg.filter.dump_all_families)
-                               return skb->len;
+                               goto out;
 
                        NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
                        return -ENOENT;
index b8ac369f98ad877f6cf9114b1dbcfcb6c4c95ec5..d219979c3e529c32e029865debc788109d05ad83 100644 (file)
@@ -587,11 +587,16 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
         */
        ret = -EINPROGRESS;
        if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
-           fq->q.meat == fq->q.len &&
-           nf_ct_frag6_reasm(fq, skb, dev))
-               ret = 0;
-       else
+           fq->q.meat == fq->q.len) {
+               unsigned long orefdst = skb->_skb_refdst;
+
+               skb->_skb_refdst = 0UL;
+               if (nf_ct_frag6_reasm(fq, skb, dev))
+                       ret = 0;
+               skb->_skb_refdst = orefdst;
+       } else {
                skb_dst_drop(skb);
+       }
 
 out_unlock:
        spin_unlock_bh(&fq->q.lock);
index bc4bd247bb7d42767eb860c05fb4b0b40408304b..1577f2f76060dcd816f94078412f52943568ce40 100644 (file)
@@ -55,11 +55,15 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
 MODULE_DESCRIPTION("core IP set support");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
 
-/* When the nfnl mutex is held: */
+/* When the nfnl mutex or ip_set_ref_lock is held: */
 #define ip_set_dereference(p)          \
-       rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+       rcu_dereference_protected(p,    \
+               lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+               lockdep_is_held(&ip_set_ref_lock))
 #define ip_set(inst, id)               \
        ip_set_dereference((inst)->ip_set_list)[id]
+#define ip_set_ref_netlink(inst,id)    \
+       rcu_dereference_raw((inst)->ip_set_list)[id]
 
 /* The set types are implemented in modules and registered set types
  * can be found in ip_set_type_list. Adding/deleting types is
@@ -693,21 +697,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
 /* Get the name of a set behind a set index.
- * We assume the set is referenced, so it does exist and
- * can't be destroyed. The set cannot be renamed due to
- * the referencing either.
- *
+ * Set itself is protected by RCU, but its name isn't: to protect against
+ * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
+ * name.
  */
-const char *
-ip_set_name_byindex(struct net *net, ip_set_id_t index)
+void
+ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
 {
-       const struct ip_set *set = ip_set_rcu_get(net, index);
+       struct ip_set *set = ip_set_rcu_get(net, index);
 
        BUG_ON(!set);
-       BUG_ON(set->ref == 0);
 
-       /* Referenced, so it's safe */
-       return set->name;
+       read_lock_bh(&ip_set_ref_lock);
+       strncpy(name, set->name, IPSET_MAXNAMELEN);
+       read_unlock_bh(&ip_set_ref_lock);
 }
 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
 
@@ -961,7 +964,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
                        /* Wraparound */
                        goto cleanup;
 
-               list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
+               list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
                if (!list)
                        goto cleanup;
                /* nfnl mutex is held, both lists are valid */
@@ -973,7 +976,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
                /* Use new list */
                index = inst->ip_set_max;
                inst->ip_set_max = i;
-               kfree(tmp);
+               kvfree(tmp);
                ret = 0;
        } else if (ret) {
                goto cleanup;
@@ -1153,7 +1156,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
        if (!set)
                return -ENOENT;
 
-       read_lock_bh(&ip_set_ref_lock);
+       write_lock_bh(&ip_set_ref_lock);
        if (set->ref != 0) {
                ret = -IPSET_ERR_REFERENCED;
                goto out;
@@ -1170,7 +1173,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
        strncpy(set->name, name2, IPSET_MAXNAMELEN);
 
 out:
-       read_unlock_bh(&ip_set_ref_lock);
+       write_unlock_bh(&ip_set_ref_lock);
        return ret;
 }
 
@@ -1252,7 +1255,7 @@ ip_set_dump_done(struct netlink_callback *cb)
                struct ip_set_net *inst =
                        (struct ip_set_net *)cb->args[IPSET_CB_NET];
                ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
-               struct ip_set *set = ip_set(inst, index);
+               struct ip_set *set = ip_set_ref_netlink(inst, index);
 
                if (set->variant->uref)
                        set->variant->uref(set, cb, false);
@@ -1441,7 +1444,7 @@ next_set:
 release_refcount:
        /* If there was an error or set is done, release set */
        if (ret || !cb->args[IPSET_CB_ARG0]) {
-               set = ip_set(inst, index);
+               set = ip_set_ref_netlink(inst, index);
                if (set->variant->uref)
                        set->variant->uref(set, cb, false);
                pr_debug("release set %s\n", set->name);
@@ -2059,7 +2062,7 @@ ip_set_net_init(struct net *net)
        if (inst->ip_set_max >= IPSET_INVALID_ID)
                inst->ip_set_max = IPSET_INVALID_ID - 1;
 
-       list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
+       list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
        if (!list)
                return -ENOMEM;
        inst->is_deleted = false;
@@ -2087,7 +2090,7 @@ ip_set_net_exit(struct net *net)
                }
        }
        nfnl_unlock(NFNL_SUBSYS_IPSET);
-       kfree(rcu_dereference_protected(inst->ip_set_list, 1));
+       kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
 }
 
 static struct pernet_operations ip_set_net_ops = {
index d391485a6acdc2ff3523d5b7d39c20ab4a8add80..613e18e720a44777754428666b9f021de952de9a 100644 (file)
@@ -213,13 +213,13 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
        if (tb[IPSET_ATTR_CIDR]) {
                e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
-               if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+               if (e.cidr[0] > HOST_MASK)
                        return -IPSET_ERR_INVALID_CIDR;
        }
 
        if (tb[IPSET_ATTR_CIDR2]) {
                e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
-               if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+               if (e.cidr[1] > HOST_MASK)
                        return -IPSET_ERR_INVALID_CIDR;
        }
 
@@ -493,13 +493,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
        if (tb[IPSET_ATTR_CIDR]) {
                e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
-               if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+               if (e.cidr[0] > HOST_MASK)
                        return -IPSET_ERR_INVALID_CIDR;
        }
 
        if (tb[IPSET_ATTR_CIDR2]) {
                e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
-               if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+               if (e.cidr[1] > HOST_MASK)
                        return -IPSET_ERR_INVALID_CIDR;
        }
 
index 072a658fde047c5d9d59ac08e796b25759cc68a1..4eef55da0878e299d0bb912fa7ea69d3d4e91441 100644 (file)
@@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
 {
        struct set_elem *e = container_of(rcu, struct set_elem, rcu);
        struct ip_set *set = e->set;
-       struct list_set *map = set->data;
 
-       ip_set_put_byindex(map->net, e->id);
        ip_set_ext_destroy(set, e);
        kfree(e);
 }
@@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu)
 static inline void
 list_set_del(struct ip_set *set, struct set_elem *e)
 {
+       struct list_set *map = set->data;
+
        set->elements--;
        list_del_rcu(&e->list);
+       ip_set_put_byindex(map->net, e->id);
        call_rcu(&e->rcu, __list_set_del_rcu);
 }
 
 static inline void
-list_set_replace(struct set_elem *e, struct set_elem *old)
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
 {
+       struct list_set *map = set->data;
+
        list_replace_rcu(&old->list, &e->list);
+       ip_set_put_byindex(map->net, old->id);
        call_rcu(&old->rcu, __list_set_del_rcu);
 }
 
@@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
        INIT_LIST_HEAD(&e->list);
        list_set_init_extensions(set, ext, e);
        if (n)
-               list_set_replace(e, n);
+               list_set_replace(set, e, n);
        else if (next)
                list_add_tail_rcu(&e->list, &next->list);
        else if (prev)
@@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
        const struct list_set *map = set->data;
        struct nlattr *atd, *nested;
        u32 i = 0, first = cb->args[IPSET_CB_ARG0];
+       char name[IPSET_MAXNAMELEN];
        struct set_elem *e;
        int ret = 0;
 
@@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
                nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
                if (!nested)
                        goto nla_put_failure;
-               if (nla_put_string(skb, IPSET_ATTR_NAME,
-                                  ip_set_name_byindex(map->net, e->id)))
+               ip_set_name_byindex(map->net, e->id, name);
+               if (nla_put_string(skb, IPSET_ATTR_NAME, name))
                        goto nla_put_failure;
                if (ip_set_put_extensions(skb, set, e, true))
                        goto nla_put_failure;
index d4020c5e831d3020a6e412ead6d1895f81b5a124..2526be6b3d9095abea1ee68bda4e4f3bf608be55 100644 (file)
@@ -1616,7 +1616,7 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
        EnterFunction(7);
 
        /* Receive a packet */
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
+       iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, buflen);
        len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
        if (len < 0)
                return len;
index ca1168d67fac6c0fc1eaef5dfeb1db8428e51db3..e92e749aff53e46c60718b55593e72d70838e9be 100644 (file)
@@ -1073,19 +1073,22 @@ static unsigned int early_drop_list(struct net *net,
        return drops;
 }
 
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static noinline int early_drop(struct net *net, unsigned int hash)
 {
-       unsigned int i;
+       unsigned int i, bucket;
 
        for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
                struct hlist_nulls_head *ct_hash;
-               unsigned int hash, hsize, drops;
+               unsigned int hsize, drops;
 
                rcu_read_lock();
                nf_conntrack_get_ht(&ct_hash, &hsize);
-               hash = reciprocal_scale(_hash++, hsize);
+               if (!i)
+                       bucket = reciprocal_scale(hash, hsize);
+               else
+                       bucket = (bucket + 1) % hsize;
 
-               drops = early_drop_list(net, &ct_hash[hash]);
+               drops = early_drop_list(net, &ct_hash[bucket]);
                rcu_read_unlock();
 
                if (drops) {
index 171e9e122e5f1e8b8840e41013d86246ba8025b9..023c1445bc3960de8c3d2350d9fb5c8d743e920f 100644 (file)
@@ -384,11 +384,6 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
        },
 };
 
-static inline struct nf_dccp_net *dccp_pernet(struct net *net)
-{
-       return &net->ct.nf_ct_proto.dccp;
-}
-
 static noinline bool
 dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
         const struct dccp_hdr *dh)
@@ -401,7 +396,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
        state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
        switch (state) {
        default:
-               dn = dccp_pernet(net);
+               dn = nf_dccp_pernet(net);
                if (dn->dccp_loose == 0) {
                        msg = "not picking up existing connection ";
                        goto out_invalid;
@@ -568,7 +563,7 @@ static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
 
        timeouts = nf_ct_timeout_lookup(ct);
        if (!timeouts)
-               timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
+               timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout;
        nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
        return NF_ACCEPT;
@@ -681,7 +676,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
 static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
                                      struct net *net, void *data)
 {
-       struct nf_dccp_net *dn = dccp_pernet(net);
+       struct nf_dccp_net *dn = nf_dccp_pernet(net);
        unsigned int *timeouts = data;
        int i;
 
@@ -814,7 +809,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
 
 static int dccp_init_net(struct net *net)
 {
-       struct nf_dccp_net *dn = dccp_pernet(net);
+       struct nf_dccp_net *dn = nf_dccp_pernet(net);
        struct nf_proto_net *pn = &dn->pn;
 
        if (!pn->users) {
index e10e867e0b55f3203e8a50d4ac7c884201ac1186..5da19d5fbc767f2ca8f22ac4eba09aebb6c59fda 100644 (file)
@@ -27,11 +27,6 @@ static bool nf_generic_should_process(u8 proto)
        }
 }
 
-static inline struct nf_generic_net *generic_pernet(struct net *net)
-{
-       return &net->ct.nf_ct_proto.generic;
-}
-
 static bool generic_pkt_to_tuple(const struct sk_buff *skb,
                                 unsigned int dataoff,
                                 struct net *net, struct nf_conntrack_tuple *tuple)
@@ -58,7 +53,7 @@ static int generic_packet(struct nf_conn *ct,
        }
 
        if (!timeout)
-               timeout = &generic_pernet(nf_ct_net(ct))->timeout;
+               timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
 
        nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
        return NF_ACCEPT;
@@ -72,7 +67,7 @@ static int generic_packet(struct nf_conn *ct,
 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
                                         struct net *net, void *data)
 {
-       struct nf_generic_net *gn = generic_pernet(net);
+       struct nf_generic_net *gn = nf_generic_pernet(net);
        unsigned int *timeout = data;
 
        if (!timeout)
@@ -138,7 +133,7 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int generic_init_net(struct net *net)
 {
-       struct nf_generic_net *gn = generic_pernet(net);
+       struct nf_generic_net *gn = nf_generic_pernet(net);
        struct nf_proto_net *pn = &gn->pn;
 
        gn->timeout = nf_ct_generic_timeout;
index 3598520bd19b7b76dbd91bb42e4b8b91713abf2c..de64d8a5fdfd137aca48a9e62a143a01f63bec07 100644 (file)
 
 static const unsigned int nf_ct_icmp_timeout = 30*HZ;
 
-static inline struct nf_icmp_net *icmp_pernet(struct net *net)
-{
-       return &net->ct.nf_ct_proto.icmp;
-}
-
 static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
                              struct net *net, struct nf_conntrack_tuple *tuple)
 {
@@ -103,7 +98,7 @@ static int icmp_packet(struct nf_conn *ct,
        }
 
        if (!timeout)
-               timeout = &icmp_pernet(nf_ct_net(ct))->timeout;
+               timeout = &nf_icmp_pernet(nf_ct_net(ct))->timeout;
 
        nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
        return NF_ACCEPT;
@@ -275,7 +270,7 @@ static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
                                      struct net *net, void *data)
 {
        unsigned int *timeout = data;
-       struct nf_icmp_net *in = icmp_pernet(net);
+       struct nf_icmp_net *in = nf_icmp_pernet(net);
 
        if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
                if (!timeout)
@@ -337,7 +332,7 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int icmp_init_net(struct net *net)
 {
-       struct nf_icmp_net *in = icmp_pernet(net);
+       struct nf_icmp_net *in = nf_icmp_pernet(net);
        struct nf_proto_net *pn = &in->pn;
 
        in->timeout = nf_ct_icmp_timeout;
index 378618feed5da7df50e09c8ec4f72618953306b0..a15eefb8e3173c5d89268bd7f2a6c076ff787b1c 100644 (file)
 
 static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
 
-static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
-{
-       return &net->ct.nf_ct_proto.icmpv6;
-}
-
 static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
                                unsigned int dataoff,
                                struct net *net,
@@ -87,7 +82,7 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 
 static unsigned int *icmpv6_get_timeouts(struct net *net)
 {
-       return &icmpv6_pernet(net)->timeout;
+       return &nf_icmpv6_pernet(net)->timeout;
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
@@ -286,7 +281,7 @@ static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
                                        struct net *net, void *data)
 {
        unsigned int *timeout = data;
-       struct nf_icmp_net *in = icmpv6_pernet(net);
+       struct nf_icmp_net *in = nf_icmpv6_pernet(net);
 
        if (!timeout)
                timeout = icmpv6_get_timeouts(net);
@@ -348,7 +343,7 @@ static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int icmpv6_init_net(struct net *net)
 {
-       struct nf_icmp_net *in = icmpv6_pernet(net);
+       struct nf_icmp_net *in = nf_icmpv6_pernet(net);
        struct nf_proto_net *pn = &in->pn;
 
        in->timeout = nf_ct_icmpv6_timeout;
index 3d719d3eb9a38c7709b8d224facdad8820ebded4..d53e3e78f6052a1f8d8fde973ee03b0763470b30 100644 (file)
@@ -146,11 +146,6 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
        }
 };
 
-static inline struct nf_sctp_net *sctp_pernet(struct net *net)
-{
-       return &net->ct.nf_ct_proto.sctp;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -480,7 +475,7 @@ static int sctp_packet(struct nf_conn *ct,
 
        timeouts = nf_ct_timeout_lookup(ct);
        if (!timeouts)
-               timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
+               timeouts = nf_sctp_pernet(nf_ct_net(ct))->timeouts;
 
        nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
@@ -599,7 +594,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
                                      struct net *net, void *data)
 {
        unsigned int *timeouts = data;
-       struct nf_sctp_net *sn = sctp_pernet(net);
+       struct nf_sctp_net *sn = nf_sctp_pernet(net);
        int i;
 
        /* set default SCTP timeouts. */
@@ -736,7 +731,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int sctp_init_net(struct net *net)
 {
-       struct nf_sctp_net *sn = sctp_pernet(net);
+       struct nf_sctp_net *sn = nf_sctp_pernet(net);
        struct nf_proto_net *pn = &sn->pn;
 
        if (!pn->users) {
index 1bcf9984d45e8601646cb2b99dc5f3113a5c8b0a..4dcbd51a8e97f04ad8056374ed892887d2f0798e 100644 (file)
@@ -272,11 +272,6 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
        }
 };
 
-static inline struct nf_tcp_net *tcp_pernet(struct net *net)
-{
-       return &net->ct.nf_ct_proto.tcp;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -475,7 +470,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
                          const struct tcphdr *tcph)
 {
        struct net *net = nf_ct_net(ct);
-       struct nf_tcp_net *tn = tcp_pernet(net);
+       struct nf_tcp_net *tn = nf_tcp_pernet(net);
        struct ip_ct_tcp_state *sender = &state->seen[dir];
        struct ip_ct_tcp_state *receiver = &state->seen[!dir];
        const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
@@ -767,7 +762,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 {
        enum tcp_conntrack new_state;
        struct net *net = nf_ct_net(ct);
-       const struct nf_tcp_net *tn = tcp_pernet(net);
+       const struct nf_tcp_net *tn = nf_tcp_pernet(net);
        const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
        const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
 
@@ -841,7 +836,7 @@ static int tcp_packet(struct nf_conn *ct,
                      const struct nf_hook_state *state)
 {
        struct net *net = nf_ct_net(ct);
-       struct nf_tcp_net *tn = tcp_pernet(net);
+       struct nf_tcp_net *tn = nf_tcp_pernet(net);
        struct nf_conntrack_tuple *tuple;
        enum tcp_conntrack new_state, old_state;
        unsigned int index, *timeouts;
@@ -1283,7 +1278,7 @@ static unsigned int tcp_nlattr_tuple_size(void)
 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
                                     struct net *net, void *data)
 {
-       struct nf_tcp_net *tn = tcp_pernet(net);
+       struct nf_tcp_net *tn = nf_tcp_pernet(net);
        unsigned int *timeouts = data;
        int i;
 
@@ -1508,7 +1503,7 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int tcp_init_net(struct net *net)
 {
-       struct nf_tcp_net *tn = tcp_pernet(net);
+       struct nf_tcp_net *tn = nf_tcp_pernet(net);
        struct nf_proto_net *pn = &tn->pn;
 
        if (!pn->users) {
index a7aa70370913ce7e8914343270152fb009eb2a63..c879d8d78cfde88a223b961bb203bf7bb48ef1b2 100644 (file)
@@ -32,14 +32,9 @@ static const unsigned int udp_timeouts[UDP_CT_MAX] = {
        [UDP_CT_REPLIED]        = 180*HZ,
 };
 
-static inline struct nf_udp_net *udp_pernet(struct net *net)
-{
-       return &net->ct.nf_ct_proto.udp;
-}
-
 static unsigned int *udp_get_timeouts(struct net *net)
 {
-       return udp_pernet(net)->timeouts;
+       return nf_udp_pernet(net)->timeouts;
 }
 
 static void udp_error_log(const struct sk_buff *skb,
@@ -212,7 +207,7 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
                                     struct net *net, void *data)
 {
        unsigned int *timeouts = data;
-       struct nf_udp_net *un = udp_pernet(net);
+       struct nf_udp_net *un = nf_udp_pernet(net);
 
        if (!timeouts)
                timeouts = un->timeouts;
@@ -292,7 +287,7 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int udp_init_net(struct net *net)
 {
-       struct nf_udp_net *un = udp_pernet(net);
+       struct nf_udp_net *un = nf_udp_pernet(net);
        struct nf_proto_net *pn = &un->pn;
 
        if (!pn->users) {
index e7a50af1b3d61a6e12fb74eaa9a9ba02f0a8d22b..a518eb162344e6692e69989cf5ba0cdf03da1333 100644 (file)
@@ -382,7 +382,8 @@ err:
 static int
 cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
                            u32 seq, u32 type, int event, u16 l3num,
-                           const struct nf_conntrack_l4proto *l4proto)
+                           const struct nf_conntrack_l4proto *l4proto,
+                           const unsigned int *timeouts)
 {
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
@@ -408,7 +409,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
        if (!nest_parms)
                goto nla_put_failure;
 
-       ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
+       ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
        if (ret < 0)
                goto nla_put_failure;
 
@@ -430,6 +431,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
                                 struct netlink_ext_ack *extack)
 {
        const struct nf_conntrack_l4proto *l4proto;
+       unsigned int *timeouts = NULL;
        struct sk_buff *skb2;
        int ret, err;
        __u16 l3num;
@@ -442,12 +444,44 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
        l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
        l4proto = nf_ct_l4proto_find_get(l4num);
 
-       /* This protocol is not supported, skip. */
-       if (l4proto->l4proto != l4num) {
-               err = -EOPNOTSUPP;
+       err = -EOPNOTSUPP;
+       if (l4proto->l4proto != l4num)
                goto err;
+
+       switch (l4proto->l4proto) {
+       case IPPROTO_ICMP:
+               timeouts = &nf_icmp_pernet(net)->timeout;
+               break;
+       case IPPROTO_TCP:
+               timeouts = nf_tcp_pernet(net)->timeouts;
+               break;
+       case IPPROTO_UDP:
+               timeouts = nf_udp_pernet(net)->timeouts;
+               break;
+       case IPPROTO_DCCP:
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+               timeouts = nf_dccp_pernet(net)->dccp_timeout;
+#endif
+               break;
+       case IPPROTO_ICMPV6:
+               timeouts = &nf_icmpv6_pernet(net)->timeout;
+               break;
+       case IPPROTO_SCTP:
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+               timeouts = nf_sctp_pernet(net)->timeouts;
+#endif
+               break;
+       case 255:
+               timeouts = &nf_generic_pernet(net)->timeout;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               break;
        }
 
+       if (!timeouts)
+               goto err;
+
        skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (skb2 == NULL) {
                err = -ENOMEM;
@@ -458,8 +492,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
                                          nlh->nlmsg_seq,
                                          NFNL_MSG_TYPE(nlh->nlmsg_type),
                                          IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
-                                         l3num,
-                                         l4proto);
+                                         l3num, l4proto, timeouts);
        if (ret <= 0) {
                kfree_skb(skb2);
                err = -ENOMEM;
index 768292eac2a46afe84df3b8a949a70bf77baf478..9d0ede4742240f544bbdff40553b73b8e755c7b4 100644 (file)
@@ -54,9 +54,11 @@ static bool nft_xt_put(struct nft_xt *xt)
        return false;
 }
 
-static int nft_compat_chain_validate_dependency(const char *tablename,
-                                               const struct nft_chain *chain)
+static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
+                                               const char *tablename)
 {
+       enum nft_chain_types type = NFT_CHAIN_T_DEFAULT;
+       const struct nft_chain *chain = ctx->chain;
        const struct nft_base_chain *basechain;
 
        if (!tablename ||
@@ -64,9 +66,12 @@ static int nft_compat_chain_validate_dependency(const char *tablename,
                return 0;
 
        basechain = nft_base_chain(chain);
-       if (strcmp(tablename, "nat") == 0 &&
-           basechain->type->type != NFT_CHAIN_T_NAT)
-               return -EINVAL;
+       if (strcmp(tablename, "nat") == 0) {
+               if (ctx->family != NFPROTO_BRIDGE)
+                       type = NFT_CHAIN_T_NAT;
+               if (basechain->type->type != type)
+                       return -EINVAL;
+       }
 
        return 0;
 }
@@ -342,8 +347,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
                if (target->hooks && !(hook_mask & target->hooks))
                        return -EINVAL;
 
-               ret = nft_compat_chain_validate_dependency(target->table,
-                                                          ctx->chain);
+               ret = nft_compat_chain_validate_dependency(ctx, target->table);
                if (ret < 0)
                        return ret;
        }
@@ -590,8 +594,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
                if (match->hooks && !(hook_mask & match->hooks))
                        return -EINVAL;
 
-               ret = nft_compat_chain_validate_dependency(match->table,
-                                                          ctx->chain);
+               ret = nft_compat_chain_validate_dependency(ctx, match->table);
                if (ret < 0)
                        return ret;
        }
index 649d1700ec5ba026307c46596112b6b3fb667255..3cc1b3dc3c3cdb2508cef7825f3bd9c485679fdb 100644 (file)
@@ -24,7 +24,6 @@ struct nft_ng_inc {
        u32                     modulus;
        atomic_t                counter;
        u32                     offset;
-       struct nft_set          *map;
 };
 
 static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
@@ -48,34 +47,11 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
        regs->data[priv->dreg] = nft_ng_inc_gen(priv);
 }
 
-static void nft_ng_inc_map_eval(const struct nft_expr *expr,
-                               struct nft_regs *regs,
-                               const struct nft_pktinfo *pkt)
-{
-       struct nft_ng_inc *priv = nft_expr_priv(expr);
-       const struct nft_set *map = priv->map;
-       const struct nft_set_ext *ext;
-       u32 result;
-       bool found;
-
-       result = nft_ng_inc_gen(priv);
-       found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-
-       if (!found)
-               return;
-
-       nft_data_copy(&regs->data[priv->dreg],
-                     nft_set_ext_data(ext), map->dlen);
-}
-
 static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
        [NFTA_NG_DREG]          = { .type = NLA_U32 },
        [NFTA_NG_MODULUS]       = { .type = NLA_U32 },
        [NFTA_NG_TYPE]          = { .type = NLA_U32 },
        [NFTA_NG_OFFSET]        = { .type = NLA_U32 },
-       [NFTA_NG_SET_NAME]      = { .type = NLA_STRING,
-                                   .len = NFT_SET_MAXNAMELEN - 1 },
-       [NFTA_NG_SET_ID]        = { .type = NLA_U32 },
 };
 
 static int nft_ng_inc_init(const struct nft_ctx *ctx,
@@ -101,22 +77,6 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
                                           NFT_DATA_VALUE, sizeof(u32));
 }
 
-static int nft_ng_inc_map_init(const struct nft_ctx *ctx,
-                              const struct nft_expr *expr,
-                              const struct nlattr * const tb[])
-{
-       struct nft_ng_inc *priv = nft_expr_priv(expr);
-       u8 genmask = nft_genmask_next(ctx->net);
-
-       nft_ng_inc_init(ctx, expr, tb);
-
-       priv->map = nft_set_lookup_global(ctx->net, ctx->table,
-                                         tb[NFTA_NG_SET_NAME],
-                                         tb[NFTA_NG_SET_ID], genmask);
-
-       return PTR_ERR_OR_ZERO(priv->map);
-}
-
 static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
                       u32 modulus, enum nft_ng_types type, u32 offset)
 {
@@ -143,27 +103,10 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
                           priv->offset);
 }
 
-static int nft_ng_inc_map_dump(struct sk_buff *skb,
-                              const struct nft_expr *expr)
-{
-       const struct nft_ng_inc *priv = nft_expr_priv(expr);
-
-       if (nft_ng_dump(skb, priv->dreg, priv->modulus,
-                       NFT_NG_INCREMENTAL, priv->offset) ||
-           nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
-               goto nla_put_failure;
-
-       return 0;
-
-nla_put_failure:
-       return -1;
-}
-
 struct nft_ng_random {
        enum nft_registers      dreg:8;
        u32                     modulus;
        u32                     offset;
-       struct nft_set          *map;
 };
 
 static u32 nft_ng_random_gen(struct nft_ng_random *priv)
@@ -183,25 +126,6 @@ static void nft_ng_random_eval(const struct nft_expr *expr,
        regs->data[priv->dreg] = nft_ng_random_gen(priv);
 }
 
-static void nft_ng_random_map_eval(const struct nft_expr *expr,
-                                  struct nft_regs *regs,
-                                  const struct nft_pktinfo *pkt)
-{
-       struct nft_ng_random *priv = nft_expr_priv(expr);
-       const struct nft_set *map = priv->map;
-       const struct nft_set_ext *ext;
-       u32 result;
-       bool found;
-
-       result = nft_ng_random_gen(priv);
-       found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-       if (!found)
-               return;
-
-       nft_data_copy(&regs->data[priv->dreg],
-                     nft_set_ext_data(ext), map->dlen);
-}
-
 static int nft_ng_random_init(const struct nft_ctx *ctx,
                              const struct nft_expr *expr,
                              const struct nlattr * const tb[])
@@ -226,21 +150,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
                                           NFT_DATA_VALUE, sizeof(u32));
 }
 
-static int nft_ng_random_map_init(const struct nft_ctx *ctx,
-                                 const struct nft_expr *expr,
-                                 const struct nlattr * const tb[])
-{
-       struct nft_ng_random *priv = nft_expr_priv(expr);
-       u8 genmask = nft_genmask_next(ctx->net);
-
-       nft_ng_random_init(ctx, expr, tb);
-       priv->map = nft_set_lookup_global(ctx->net, ctx->table,
-                                         tb[NFTA_NG_SET_NAME],
-                                         tb[NFTA_NG_SET_ID], genmask);
-
-       return PTR_ERR_OR_ZERO(priv->map);
-}
-
 static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
        const struct nft_ng_random *priv = nft_expr_priv(expr);
@@ -249,22 +158,6 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
                           priv->offset);
 }
 
-static int nft_ng_random_map_dump(struct sk_buff *skb,
-                                 const struct nft_expr *expr)
-{
-       const struct nft_ng_random *priv = nft_expr_priv(expr);
-
-       if (nft_ng_dump(skb, priv->dreg, priv->modulus,
-                       NFT_NG_RANDOM, priv->offset) ||
-           nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
-               goto nla_put_failure;
-
-       return 0;
-
-nla_put_failure:
-       return -1;
-}
-
 static struct nft_expr_type nft_ng_type;
 static const struct nft_expr_ops nft_ng_inc_ops = {
        .type           = &nft_ng_type,
@@ -274,14 +167,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
        .dump           = nft_ng_inc_dump,
 };
 
-static const struct nft_expr_ops nft_ng_inc_map_ops = {
-       .type           = &nft_ng_type,
-       .size           = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
-       .eval           = nft_ng_inc_map_eval,
-       .init           = nft_ng_inc_map_init,
-       .dump           = nft_ng_inc_map_dump,
-};
-
 static const struct nft_expr_ops nft_ng_random_ops = {
        .type           = &nft_ng_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
@@ -290,14 +175,6 @@ static const struct nft_expr_ops nft_ng_random_ops = {
        .dump           = nft_ng_random_dump,
 };
 
-static const struct nft_expr_ops nft_ng_random_map_ops = {
-       .type           = &nft_ng_type,
-       .size           = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
-       .eval           = nft_ng_random_map_eval,
-       .init           = nft_ng_random_map_init,
-       .dump           = nft_ng_random_map_dump,
-};
-
 static const struct nft_expr_ops *
 nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 {
@@ -312,12 +189,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 
        switch (type) {
        case NFT_NG_INCREMENTAL:
-               if (tb[NFTA_NG_SET_NAME])
-                       return &nft_ng_inc_map_ops;
                return &nft_ng_inc_ops;
        case NFT_NG_RANDOM:
-               if (tb[NFTA_NG_SET_NAME])
-                       return &nft_ng_random_map_ops;
                return &nft_ng_random_ops;
        }
 
index ca5e5d8c5ef8b91cd61cb039d652f4549c343948..b13618c764ec296377778ee405b9067515ada25a 100644 (file)
@@ -50,7 +50,7 @@ static int nft_osf_init(const struct nft_ctx *ctx,
        int err;
        u8 ttl;
 
-       if (nla_get_u8(tb[NFTA_OSF_TTL])) {
+       if (tb[NFTA_OSF_TTL]) {
                ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
                if (ttl > 2)
                        return -EINVAL;
index c6acfc2d9c8414d36173e3cf09f94ea64f0d7515..eb4cbd244c3d311e2630a4c4cae868f0343c30f0 100644 (file)
@@ -114,6 +114,22 @@ static void idletimer_tg_expired(struct timer_list *t)
        schedule_work(&timer->work);
 }
 
+static int idletimer_check_sysfs_name(const char *name, unsigned int size)
+{
+       int ret;
+
+       ret = xt_check_proc_name(name, size);
+       if (ret < 0)
+               return ret;
+
+       if (!strcmp(name, "power") ||
+           !strcmp(name, "subsystem") ||
+           !strcmp(name, "uevent"))
+               return -EINVAL;
+
+       return 0;
+}
+
 static int idletimer_tg_create(struct idletimer_tg_info *info)
 {
        int ret;
@@ -124,6 +140,10 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
                goto out;
        }
 
+       ret = idletimer_check_sysfs_name(info->label, sizeof(info->label));
+       if (ret < 0)
+               goto out_free_timer;
+
        sysfs_attr_init(&info->timer->attr.attr);
        info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
        if (!info->timer->attr.attr.name) {
index 6bec37ab4472796ecd1f453966b27bb911bf8fa8..a4660c48ff0149ad1798cd646d4edb2ed5c7770f 100644 (file)
@@ -1203,7 +1203,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
                                         &info->labels.mask);
                if (err)
                        return err;
-       } else if (labels_nonzero(&info->labels.mask)) {
+       } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+                  labels_nonzero(&info->labels.mask)) {
                err = ovs_ct_set_labels(ct, key, &info->labels.value,
                                        &info->labels.mask);
                if (err)
index a70097ecf33c2bf9e9df7b92c2359ab679ae6d7e..865ecef68196900157b29c59b6bd57aff53e9e07 100644 (file)
@@ -3030,7 +3030,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                         * is already present */
                        if (mac_proto != MAC_PROTO_NONE)
                                return -EINVAL;
-                       mac_proto = MAC_PROTO_NONE;
+                       mac_proto = MAC_PROTO_ETHERNET;
                        break;
 
                case OVS_ACTION_ATTR_POP_ETH:
@@ -3038,7 +3038,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                return -EINVAL;
                        if (vlan_tci & htons(VLAN_TAG_PRESENT))
                                return -EINVAL;
-                       mac_proto = MAC_PROTO_ETHERNET;
+                       mac_proto = MAC_PROTO_NONE;
                        break;
 
                case OVS_ACTION_ATTR_PUSH_NSH:
index 382196e57a26c137f03fea19cb2ff5d8d69c728b..bc628acf4f4ffe7172e1be6591811e056ccb4f1b 100644 (file)
@@ -611,6 +611,7 @@ struct rxrpc_call {
                                                 * not hard-ACK'd packet follows this.
                                                 */
        rxrpc_seq_t             tx_top;         /* Highest Tx slot allocated. */
+       u16                     tx_backoff;     /* Delay to insert due to Tx failure */
 
        /* TCP-style slow-start congestion control [RFC5681].  Since the SMSS
         * is fixed, we keep these numbers in terms of segments (ie. DATA
index 8e7434e92097e8f0a2676bcf87df090daf43ee2e..468efc3660c03805608d5e4f2f146f007e03f9b1 100644 (file)
@@ -123,6 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
                else
                        ack_at = expiry;
 
+               ack_at += READ_ONCE(call->tx_backoff);
                ack_at += now;
                if (time_before(ack_at, call->ack_at)) {
                        WRITE_ONCE(call->ack_at, ack_at);
@@ -311,6 +312,7 @@ void rxrpc_process_call(struct work_struct *work)
                container_of(work, struct rxrpc_call, processor);
        rxrpc_serial_t *send_ack;
        unsigned long now, next, t;
+       unsigned int iterations = 0;
 
        rxrpc_see_call(call);
 
@@ -319,6 +321,11 @@ void rxrpc_process_call(struct work_struct *work)
               call->debug_id, rxrpc_call_states[call->state], call->events);
 
 recheck_state:
+       /* Limit the number of times we do this before returning to the manager */
+       iterations++;
+       if (iterations > 5)
+               goto requeue;
+
        if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
                rxrpc_send_abort_packet(call);
                goto recheck_state;
@@ -447,13 +454,16 @@ recheck_state:
        rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
 
        /* other events may have been raised since we started checking */
-       if (call->events && call->state < RXRPC_CALL_COMPLETE) {
-               __rxrpc_queue_call(call);
-               goto out;
-       }
+       if (call->events && call->state < RXRPC_CALL_COMPLETE)
+               goto requeue;
 
 out_put:
        rxrpc_put_call(call, rxrpc_call_put);
 out:
        _leave("");
+       return;
+
+requeue:
+       __rxrpc_queue_call(call);
+       goto out;
 }
index 1894188888391fca2ef98a5324de7bc99c4b381f..736aa92811004cfe5d157abd4827710783f8d57c 100644 (file)
@@ -34,6 +34,21 @@ struct rxrpc_abort_buffer {
 
 static const char rxrpc_keepalive_string[] = "";
 
+/*
+ * Increase Tx backoff on transmission failure and clear it on success.
+ */
+static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
+{
+       if (ret < 0) {
+               u16 tx_backoff = READ_ONCE(call->tx_backoff);
+
+               if (tx_backoff < HZ)
+                       WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+       } else {
+               WRITE_ONCE(call->tx_backoff, 0);
+       }
+}
+
 /*
  * Arrange for a keepalive ping a certain time after we last transmitted.  This
  * lets the far side know we're still interested in this call and helps keep
@@ -210,6 +225,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
        else
                trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
                                      rxrpc_tx_point_call_ack);
+       rxrpc_tx_backoff(call, ret);
 
        if (call->state < RXRPC_CALL_COMPLETE) {
                if (ret < 0) {
@@ -218,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
                        rxrpc_propose_ACK(call, pkt->ack.reason,
                                          ntohs(pkt->ack.maxSkew),
                                          ntohl(pkt->ack.serial),
-                                         true, true,
+                                         false, true,
                                          rxrpc_propose_ack_retry_tx);
                } else {
                        spin_lock_bh(&call->lock);
@@ -300,7 +316,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
        else
                trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
                                      rxrpc_tx_point_call_abort);
-
+       rxrpc_tx_backoff(call, ret);
 
        rxrpc_put_connection(conn);
        return ret;
@@ -413,6 +429,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
        else
                trace_rxrpc_tx_packet(call->debug_id, &whdr,
                                      rxrpc_tx_point_call_data_nofrag);
+       rxrpc_tx_backoff(call, ret);
        if (ret == -EMSGSIZE)
                goto send_fragmentable;
 
@@ -445,9 +462,18 @@ done:
                        rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
                                                rxrpc_timer_set_for_normal);
                }
-       }
 
-       rxrpc_set_keepalive(call);
+               rxrpc_set_keepalive(call);
+       } else {
+               /* Cancel the call if the initial transmission fails,
+                * particularly if that's due to network routing issues that
+                * aren't going away anytime soon.  The layer above can arrange
+                * the retransmission.
+                */
+               if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+                       rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+                                                 RX_USER_ABORT, ret);
+       }
 
        _leave(" = %d [%u]", ret, call->peer->maxdata);
        return ret;
@@ -506,6 +532,7 @@ send_fragmentable:
        else
                trace_rxrpc_tx_packet(call->debug_id, &whdr,
                                      rxrpc_tx_point_call_data_frag);
+       rxrpc_tx_backoff(call, ret);
 
        up_write(&conn->params.local->defrag_sem);
        goto done;
index a827a1f562bf323d03cd5e70ffce67da53401a61..6a28b96e779e68d5259138e11da17c1216e18b71 100644 (file)
@@ -499,8 +499,9 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
 void sctp_assoc_rm_peer(struct sctp_association *asoc,
                        struct sctp_transport *peer)
 {
-       struct list_head        *pos;
-       struct sctp_transport   *transport;
+       struct sctp_transport *transport;
+       struct list_head *pos;
+       struct sctp_chunk *ch;
 
        pr_debug("%s: association:%p addr:%pISpc\n",
                 __func__, asoc, &peer->ipaddr.sa);
@@ -564,7 +565,6 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
         */
        if (!list_empty(&peer->transmitted)) {
                struct sctp_transport *active = asoc->peer.active_path;
-               struct sctp_chunk *ch;
 
                /* Reset the transport of each chunk on this list */
                list_for_each_entry(ch, &peer->transmitted,
@@ -586,6 +586,10 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
                                sctp_transport_hold(active);
        }
 
+       list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list)
+               if (ch->transport == peer)
+                       ch->transport = NULL;
+
        asoc->peer.transport_count--;
 
        sctp_transport_free(peer);
index 9cb854b05342e57a6743ee1fd7e91cab7c09bbd2..c37e1c2dec9d451f5bfc8ffd8a0f8b9d00358316 100644 (file)
@@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
        INIT_LIST_HEAD(&q->retransmit);
        INIT_LIST_HEAD(&q->sacked);
        INIT_LIST_HEAD(&q->abandoned);
-       sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
+       sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT);
 }
 
 /* Free the outqueue structure and any related pending chunks.
index fc0386e8ff23933a0e5eae47661d39d8799b6a66..739f3e50120ddf61c508cc677b46e0239054131c 100644 (file)
@@ -7083,14 +7083,15 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
        }
 
        policy = params.sprstat_policy;
-       if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+       if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+           ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
                goto out;
 
        asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
        if (!asoc)
                goto out;
 
-       if (policy & SCTP_PR_SCTP_ALL) {
+       if (policy == SCTP_PR_SCTP_ALL) {
                params.sprstat_abandoned_unsent = 0;
                params.sprstat_abandoned_sent = 0;
                for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -7142,7 +7143,8 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
        }
 
        policy = params.sprstat_policy;
-       if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+       if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+           ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
                goto out;
 
        asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
index 52241d679cc91cd3b6ec59a4388d33b68c76f1f1..89c3a8c7859a3ae11a63d8db5e8b42e0ca26a897 100644 (file)
@@ -286,7 +286,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
         */
        krflags = MSG_PEEK | MSG_WAITALL;
        smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
+       iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
                        sizeof(struct smc_clc_msg_hdr));
        len = sock_recvmsg(smc->clcsock, &msg, krflags);
        if (signal_pending(current)) {
@@ -325,7 +325,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 
        /* receive the complete CLC message */
        memset(&msg, 0, sizeof(struct msghdr));
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
+       iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen);
        krflags = MSG_WAITALL;
        len = sock_recvmsg(smc->clcsock, &msg, krflags);
        if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
index 99c96851469f4cfa1aa7e8bd9606e5e487b6bee3..593826e11a5376bd1301b2cf19c61c377653c653 100644 (file)
@@ -635,7 +635,7 @@ EXPORT_SYMBOL(sock_sendmsg);
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
                   struct kvec *vec, size_t num, size_t size)
 {
-       iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+       iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
        return sock_sendmsg(sock, msg);
 }
 EXPORT_SYMBOL(kernel_sendmsg);
@@ -648,7 +648,7 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
        if (!sock->ops->sendmsg_locked)
                return sock_no_sendmsg_locked(sk, msg, size);
 
-       iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+       iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
 
        return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
 }
@@ -823,7 +823,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
        mm_segment_t oldfs = get_fs();
        int result;
 
-       iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
+       iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
        set_fs(KERNEL_DS);
        result = sock_recvmsg(sock, msg, flags);
        set_fs(oldfs);
index 7f0424dfa8f6df7c5d5d2ce7c2c20b12ab366e30..eab71fc7af3e00ff72a339c1f3d7f1e910358e89 100644 (file)
@@ -274,6 +274,7 @@ out_err:
 static int
 gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 {
+       u32 seq_send;
        int tmp;
 
        p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
@@ -315,9 +316,10 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
        p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
        if (IS_ERR(p))
                goto out_err;
-       p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+       p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
        if (IS_ERR(p))
                goto out_err;
+       atomic_set(&ctx->seq_send, seq_send);
        p = simple_get_netobj(p, end, &ctx->mech_used);
        if (IS_ERR(p))
                goto out_err;
@@ -607,6 +609,7 @@ static int
 gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
                gfp_t gfp_mask)
 {
+       u64 seq_send64;
        int keylen;
 
        p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
@@ -617,14 +620,15 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
        p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
        if (IS_ERR(p))
                goto out_err;
-       p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
+       p = simple_get_bytes(p, end, &seq_send64, sizeof(seq_send64));
        if (IS_ERR(p))
                goto out_err;
+       atomic64_set(&ctx->seq_send64, seq_send64);
        /* set seq_send for use by "older" enctypes */
-       ctx->seq_send = ctx->seq_send64;
-       if (ctx->seq_send64 != ctx->seq_send) {
-               dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
-                       (unsigned long)ctx->seq_send64, ctx->seq_send);
+       atomic_set(&ctx->seq_send, seq_send64);
+       if (seq_send64 != atomic_read(&ctx->seq_send)) {
+               dprintk("%s: seq_send64 %llx, seq_send %x overflow?\n", __func__,
+                       seq_send64, atomic_read(&ctx->seq_send));
                p = ERR_PTR(-EINVAL);
                goto out_err;
        }
index b4adeb06660b15f6ffad21e5e0d79a88b61b1fd5..48fe4a591b543bb5f29969e1713ac0c5d51f7a5a 100644 (file)
@@ -123,30 +123,6 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
        return krb5_hdr;
 }
 
-u32
-gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx)
-{
-       u32 old, seq_send = READ_ONCE(ctx->seq_send);
-
-       do {
-               old = seq_send;
-               seq_send = cmpxchg(&ctx->seq_send, old, old + 1);
-       } while (old != seq_send);
-       return seq_send;
-}
-
-u64
-gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx)
-{
-       u64 old, seq_send = READ_ONCE(ctx->seq_send);
-
-       do {
-               old = seq_send;
-               seq_send = cmpxchg64(&ctx->seq_send64, old, old + 1);
-       } while (old != seq_send);
-       return seq_send;
-}
-
 static u32
 gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
                struct xdr_netobj *token)
@@ -177,7 +153,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 
        memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-       seq_send = gss_seq_send_fetch_and_inc(ctx);
+       seq_send = atomic_fetch_inc(&ctx->seq_send);
 
        if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
                              seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
@@ -205,7 +181,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 
        /* Set up the sequence number. Now 64-bits in clear
         * text and w/o direction indicator */
-       seq_send_be64 = cpu_to_be64(gss_seq_send64_fetch_and_inc(ctx));
+       seq_send_be64 = cpu_to_be64(atomic64_fetch_inc(&ctx->seq_send64));
        memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
 
        if (ctx->initiate) {
index 962fa84e6db114f95790f8d6bba485fe226ed43e..5cdde6cb703a423ff48682f86e5275e331bbe242 100644 (file)
@@ -228,7 +228,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
        memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-       seq_send = gss_seq_send_fetch_and_inc(kctx);
+       seq_send = atomic_fetch_inc(&kctx->seq_send);
 
        /* XXX would probably be more efficient to compute checksum
         * and encrypt at the same time: */
@@ -475,7 +475,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
        *be16ptr++ = 0;
 
        be64ptr = (__be64 *)be16ptr;
-       *be64ptr = cpu_to_be64(gss_seq_send64_fetch_and_inc(kctx));
+       *be64ptr = cpu_to_be64(atomic64_fetch_inc(&kctx->seq_send64));
 
        err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
        if (err)
index 3b525accaa6857bc76cda9a8b9f131791fcd1f26..986f3ed7d1a24800d31713143aebffc45e32fc16 100644 (file)
@@ -336,7 +336,7 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
        rqstp->rq_xprt_hlen = 0;
 
        clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
+       iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
        if (base != 0) {
                iov_iter_advance(&msg.msg_iter, base);
                buflen -= base;
index 1b51e04d356609f37b9e8042768a083d31391600..ae77c71c1f640c32fd8d673934be412da9da0189 100644 (file)
@@ -361,7 +361,7 @@ static ssize_t
 xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
                struct kvec *kvec, size_t count, size_t seek)
 {
-       iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count);
+       iov_iter_kvec(&msg->msg_iter, READ, kvec, 1, count);
        return xs_sock_recvmsg(sock, msg, flags, seek);
 }
 
@@ -370,7 +370,7 @@ xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
                struct bio_vec *bvec, unsigned long nr, size_t count,
                size_t seek)
 {
-       iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count);
+       iov_iter_bvec(&msg->msg_iter, READ, bvec, nr, count);
        return xs_sock_recvmsg(sock, msg, flags, seek);
 }
 
index 4bdea00571711d16894e991bebee8d878955e97d..efb16f69bd2c4c0bc012d5f18c3ad42e71260e81 100644 (file)
@@ -394,7 +394,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
        iov.iov_base = &s;
        iov.iov_len = sizeof(s);
        msg.msg_name = NULL;
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+       iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, iov.iov_len);
        ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
        if (ret == -EWOULDBLOCK)
                return -EWOULDBLOCK;
index 276edbc04f3859efe96540e8e4705e730bcba56f..d753e362d2d9e625b9d9c4c476cc06cdcbc5430a 100644 (file)
@@ -489,7 +489,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
 
        iov.iov_base = kaddr + offset;
        iov.iov_len = size;
-       iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+       iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
        rc = tls_push_data(sk, &msg_iter, size,
                           flags, TLS_RECORD_TYPE_DATA);
        kunmap(page);
@@ -538,7 +538,7 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
 {
        struct iov_iter msg_iter;
 
-       iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+       iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
        return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
 }
 
index 5cd88ba8acd175bc013cc95917120d8eb6899b99..7b1af8b59cd20e56841c437aa97d981fa89be864 100644 (file)
@@ -799,7 +799,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
        struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send);
        bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
        unsigned char record_type = TLS_RECORD_TYPE_DATA;
-       bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+       bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
        bool eor = !(msg->msg_flags & MSG_MORE);
        size_t try_to_copy, copied = 0;
        struct sk_msg *msg_pl, *msg_en;
@@ -1457,7 +1457,7 @@ int tls_sw_recvmsg(struct sock *sk,
        bool cmsg = false;
        int target, err = 0;
        long timeo;
-       bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+       bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
        int num_async = 0;
 
        flags |= nonblock;
index 4a9ee2d83158ba87a4da985af1020faae8c440b7..140270a13d54f7c69584fa6aefbf6b1be0941ec6 100644 (file)
@@ -8,7 +8,6 @@ config XFRM
 
 config XFRM_OFFLOAD
        bool
-       depends on XFRM
 
 config XFRM_ALGO
        tristate
index b669262682c9763e7c863d6bb77f44ed34402cce..dc4a9f1fb941a8eef7f1a3b68563c17abcf4d919 100644 (file)
@@ -2077,10 +2077,8 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
        struct xfrm_mgr *km;
        struct xfrm_policy *pol = NULL;
 
-#ifdef CONFIG_COMPAT
        if (in_compat_syscall())
                return -EOPNOTSUPP;
-#endif
 
        if (!optval && !optlen) {
                xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
index ca7a207b81a9587c942dd8763e4444cc5675f1ee..c9a84e22f5d578216cd59687e293ed3a078cd565 100644 (file)
@@ -2621,10 +2621,8 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
        const struct xfrm_link *link;
        int type, err;
 
-#ifdef CONFIG_COMPAT
        if (in_compat_syscall())
                return -EOPNOTSUPP;
-#endif
 
        type = nlh->nlmsg_type;
        if (type > XFRM_MSG_MAX)
index ca21a35fa244e77fd3c503d28f2f537c6fb8a7d4..bb015551c2d9ae11c67f276cc64d7f84ef5521fc 100644 (file)
@@ -140,17 +140,9 @@ cc-option-yn = $(call try-run,\
 cc-disable-warning = $(call try-run,\
        $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
-# cc-name
-# Expands to either gcc or clang
-cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo gcc)
-
 # cc-version
 cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
 
-# cc-fullversion
-cc-fullversion = $(shell $(CONFIG_SHELL) \
-       $(srctree)/scripts/gcc-version.sh -p $(CC))
-
 # cc-ifversion
 # Usage:  EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
 cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))
index 24b2fb1d12974d1507705f475b52aedc81c9be5c..768306add59131c67e2ec848c27c31032b8fae71 100644 (file)
@@ -29,6 +29,7 @@ warning-1 += $(call cc-option, -Wmissing-include-dirs)
 warning-1 += $(call cc-option, -Wunused-but-set-variable)
 warning-1 += $(call cc-option, -Wunused-const-variable)
 warning-1 += $(call cc-option, -Wpacked-not-aligned)
+warning-1 += $(call cc-option, -Wstringop-truncation)
 warning-1 += $(call cc-disable-warning, missing-field-initializers)
 warning-1 += $(call cc-disable-warning, sign-compare)
 
@@ -64,7 +65,7 @@ endif
 KBUILD_CFLAGS += $(warning)
 else
 
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
 KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-value)
 KBUILD_CFLAGS += $(call cc-disable-warning, format)
index 0a482f341576766793cb7ac85601e92e5c0e8c8d..46c5c680980657dfc8997d5127bfab198b710490 100644 (file)
@@ -26,6 +26,16 @@ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT)            \
 gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE)  \
                += -fplugin-arg-randomize_layout_plugin-performance-mode
 
+gcc-plugin-$(CONFIG_GCC_PLUGIN_STACKLEAK)      += stackleak_plugin.so
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK)               \
+               += -DSTACKLEAK_PLUGIN
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK)               \
+               += -fplugin-arg-stackleak_plugin-track-min-size=$(CONFIG_STACKLEAK_TRACK_MIN_SIZE)
+ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+    DISABLE_STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-disable
+endif
+export DISABLE_STACKLEAK_PLUGIN
+
 # All the plugin CFLAGS are collected here in case a build target needs to
 # filter them out of the KBUILD_CFLAGS.
 GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
index cb0c889e13aa05818e222bbfd2cef9f082a29655..0d5c799688f0ae31fa02f82dc8451ad1a5811ff5 100644 (file)
@@ -139,4 +139,55 @@ config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
          in structures.  This reduces the performance hit of RANDSTRUCT
          at the cost of weakened randomization.
 
+config GCC_PLUGIN_STACKLEAK
+       bool "Erase the kernel stack before returning from syscalls"
+       depends on GCC_PLUGINS
+       depends on HAVE_ARCH_STACKLEAK
+       help
+         This option makes the kernel erase the kernel stack before
+         returning from system calls. That reduces the information which
+         kernel stack leak bugs can reveal and blocks some uninitialized
+         stack variable attacks.
+
+         The tradeoff is the performance impact: on a single CPU system kernel
+         compilation sees a 1% slowdown, other systems and workloads may vary
+         and you are advised to test this feature on your expected workload
+         before deploying it.
+
+         This plugin was ported from grsecurity/PaX. More information at:
+          * https://grsecurity.net/
+          * https://pax.grsecurity.net/
+
+config STACKLEAK_TRACK_MIN_SIZE
+       int "Minimum stack frame size of functions tracked by STACKLEAK"
+       default 100
+       range 0 4096
+       depends on GCC_PLUGIN_STACKLEAK
+       help
+         The STACKLEAK gcc plugin instruments the kernel code for tracking
+         the lowest border of the kernel stack (and for some other purposes).
+         It inserts the stackleak_track_stack() call for the functions with
+         a stack frame size greater than or equal to this parameter.
+         If unsure, leave the default value 100.
+
+config STACKLEAK_METRICS
+       bool "Show STACKLEAK metrics in the /proc file system"
+       depends on GCC_PLUGIN_STACKLEAK
+       depends on PROC_FS
+       help
+         If this is set, STACKLEAK metrics for every task are available in
+         the /proc file system. In particular, /proc/<pid>/stack_depth
+         shows the maximum kernel stack consumption for the current and
+         previous syscalls. Although this information is not precise, it
+         can be useful for estimating the STACKLEAK performance impact for
+         your workloads.
+
+config STACKLEAK_RUNTIME_DISABLE
+       bool "Allow runtime disabling of kernel stack erasing"
+       depends on GCC_PLUGIN_STACKLEAK
+       help
+         This option provides 'stack_erasing' sysctl, which can be used in
+         runtime to control kernel stack erasing for kernels built with
+         CONFIG_GCC_PLUGIN_STACKLEAK.
+
 endif
diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
new file mode 100644 (file)
index 0000000..2f48da9
--- /dev/null
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2011-2017 by the PaX Team <pageexec@freemail.hu>
+ * Modified by Alexander Popov <alex.popov@linux.com>
+ * Licensed under the GPL v2
+ *
+ * Note: the choice of the license means that the compilation process is
+ * NOT 'eligible' as defined by gcc's library exception to the GPL v3,
+ * but for the kernel it doesn't matter since it doesn't link against
+ * any of the gcc libraries
+ *
+ * This gcc plugin is needed for tracking the lowest border of the kernel stack.
+ * It instruments the kernel code inserting stackleak_track_stack() calls:
+ *  - after alloca();
+ *  - for the functions with a stack frame size greater than or equal
+ *     to the "track-min-size" plugin parameter.
+ *
+ * This plugin is ported from grsecurity/PaX. For more information see:
+ *   https://grsecurity.net/
+ *   https://pax.grsecurity.net/
+ *
+ * Debugging:
+ *  - use fprintf() to stderr, debug_generic_expr(), debug_gimple_stmt(),
+ *     print_rtl() and print_simple_rtl();
+ *  - add "-fdump-tree-all -fdump-rtl-all" to the plugin CFLAGS in
+ *     Makefile.gcc-plugins to see the verbose dumps of the gcc passes;
+ *  - use gcc -E to understand the preprocessing shenanigans;
+ *  - use gcc with enabled CFG/GIMPLE/SSA verification (--enable-checking).
+ */
+
+#include "gcc-common.h"
+
+__visible int plugin_is_GPL_compatible;
+
+static int track_frame_size = -1;
+static const char track_function[] = "stackleak_track_stack";
+
+/*
+ * Mark these global variables (roots) for gcc garbage collector since
+ * they point to the garbage-collected memory.
+ */
+static GTY(()) tree track_function_decl;
+
+static struct plugin_info stackleak_plugin_info = {
+       .version = "201707101337",
+       .help = "track-min-size=nn\ttrack stack for functions with a stack frame size >= nn bytes\n"
+               "disable\t\tdo not activate the plugin\n"
+};
+
+static void stackleak_add_track_stack(gimple_stmt_iterator *gsi, bool after)
+{
+       gimple stmt;
+       gcall *stackleak_track_stack;
+       cgraph_node_ptr node;
+       int frequency;
+       basic_block bb;
+
+       /* Insert call to void stackleak_track_stack(void) */
+       stmt = gimple_build_call(track_function_decl, 0);
+       stackleak_track_stack = as_a_gcall(stmt);
+       if (after) {
+               gsi_insert_after(gsi, stackleak_track_stack,
+                                               GSI_CONTINUE_LINKING);
+       } else {
+               gsi_insert_before(gsi, stackleak_track_stack, GSI_SAME_STMT);
+       }
+
+       /* Update the cgraph */
+       bb = gimple_bb(stackleak_track_stack);
+       node = cgraph_get_create_node(track_function_decl);
+       gcc_assert(node);
+       frequency = compute_call_stmt_bb_frequency(current_function_decl, bb);
+       cgraph_create_edge(cgraph_get_node(current_function_decl), node,
+                       stackleak_track_stack, bb->count, frequency);
+}
+
+static bool is_alloca(gimple stmt)
+{
+       if (gimple_call_builtin_p(stmt, BUILT_IN_ALLOCA))
+               return true;
+
+#if BUILDING_GCC_VERSION >= 4007
+       if (gimple_call_builtin_p(stmt, BUILT_IN_ALLOCA_WITH_ALIGN))
+               return true;
+#endif
+
+       return false;
+}
+
+/*
+ * Work with the GIMPLE representation of the code. Insert the
+ * stackleak_track_stack() call after alloca() and into the beginning
+ * of the function if it is not instrumented.
+ */
+static unsigned int stackleak_instrument_execute(void)
+{
+       basic_block bb, entry_bb;
+       bool prologue_instrumented = false, is_leaf = true;
+       gimple_stmt_iterator gsi;
+
+       /*
+        * ENTRY_BLOCK_PTR is a basic block which represents possible entry
+        * point of a function. This block does not contain any code and
+        * has a CFG edge to its successor.
+        */
+       gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+       entry_bb = single_succ(ENTRY_BLOCK_PTR_FOR_FN(cfun));
+
+       /*
+        * Loop through the GIMPLE statements in each of cfun basic blocks.
+        * cfun is a global variable which represents the function that is
+        * currently processed.
+        */
+       FOR_EACH_BB_FN(bb, cfun) {
+               for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
+                       gimple stmt;
+
+                       stmt = gsi_stmt(gsi);
+
+                       /* Leaf function is a function which makes no calls */
+                       if (is_gimple_call(stmt))
+                               is_leaf = false;
+
+                       if (!is_alloca(stmt))
+                               continue;
+
+                       /* Insert stackleak_track_stack() call after alloca() */
+                       stackleak_add_track_stack(&gsi, true);
+                       if (bb == entry_bb)
+                               prologue_instrumented = true;
+               }
+       }
+
+       if (prologue_instrumented)
+               return 0;
+
+       /*
+        * Special cases to skip the instrumentation.
+        *
+        * Taking the address of static inline functions materializes them,
+        * but we mustn't instrument some of them as the resulting stack
+        * alignment required by the function call ABI will break other
+        * assumptions regarding the expected (but not otherwise enforced)
+        * register clobbering ABI.
+        *
+        * Case in point: native_save_fl on amd64 when optimized for size
+        * clobbers rdx if it were instrumented here.
+        *
+        * TODO: any more special cases?
+        */
+       if (is_leaf &&
+           !TREE_PUBLIC(current_function_decl) &&
+           DECL_DECLARED_INLINE_P(current_function_decl)) {
+               return 0;
+       }
+
+       if (is_leaf &&
+           !strncmp(IDENTIFIER_POINTER(DECL_NAME(current_function_decl)),
+                    "_paravirt_", 10)) {
+               return 0;
+       }
+
+       /* Insert stackleak_track_stack() call at the function beginning */
+       bb = entry_bb;
+       if (!single_pred_p(bb)) {
+               /* gcc_assert(bb_loop_depth(bb) ||
+                               (bb->flags & BB_IRREDUCIBLE_LOOP)); */
+               split_edge(single_succ_edge(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+               gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+               bb = single_succ(ENTRY_BLOCK_PTR_FOR_FN(cfun));
+       }
+       gsi = gsi_after_labels(bb);
+       stackleak_add_track_stack(&gsi, false);
+
+       return 0;
+}
+
+static bool large_stack_frame(void)
+{
+#if BUILDING_GCC_VERSION >= 8000
+       return maybe_ge(get_frame_size(), track_frame_size);
+#else
+       return (get_frame_size() >= track_frame_size);
+#endif
+}
+
+/*
+ * Work with the RTL representation of the code.
+ * Remove the unneeded stackleak_track_stack() calls from the functions
+ * which don't call alloca() and don't have a large enough stack frame size.
+ */
+static unsigned int stackleak_cleanup_execute(void)
+{
+       rtx_insn *insn, *next;
+
+       if (cfun->calls_alloca)
+               return 0;
+
+       if (large_stack_frame())
+               return 0;
+
+       /*
+        * Find stackleak_track_stack() calls. Loop through the chain of insns,
+        * which is an RTL representation of the code for a function.
+        *
+        * The example of a matching insn:
+        *  (call_insn 8 4 10 2 (call (mem (symbol_ref ("stackleak_track_stack")
+        *  [flags 0x41] <function_decl 0x7f7cd3302a80 stackleak_track_stack>)
+        *  [0 stackleak_track_stack S1 A8]) (0)) 675 {*call} (expr_list
+        *  (symbol_ref ("stackleak_track_stack") [flags 0x41] <function_decl
+        *  0x7f7cd3302a80 stackleak_track_stack>) (expr_list (0) (nil))) (nil))
+        */
+       for (insn = get_insns(); insn; insn = next) {
+               rtx body;
+
+               next = NEXT_INSN(insn);
+
+               /* Check the expression code of the insn */
+               if (!CALL_P(insn))
+                       continue;
+
+               /*
+                * Check the expression code of the insn body, which is an RTL
+                * Expression (RTX) describing the side effect performed by
+                * that insn.
+                */
+               body = PATTERN(insn);
+
+               if (GET_CODE(body) == PARALLEL)
+                       body = XVECEXP(body, 0, 0);
+
+               if (GET_CODE(body) != CALL)
+                       continue;
+
+               /*
+                * Check the first operand of the call expression. It should
+                * be a mem RTX describing the needed subroutine with a
+                * symbol_ref RTX.
+                */
+               body = XEXP(body, 0);
+               if (GET_CODE(body) != MEM)
+                       continue;
+
+               body = XEXP(body, 0);
+               if (GET_CODE(body) != SYMBOL_REF)
+                       continue;
+
+               if (SYMBOL_REF_DECL(body) != track_function_decl)
+                       continue;
+
+               /* Delete the stackleak_track_stack() call */
+               delete_insn_and_edges(insn);
+#if BUILDING_GCC_VERSION >= 4007 && BUILDING_GCC_VERSION < 8000
+               if (GET_CODE(next) == NOTE &&
+                   NOTE_KIND(next) == NOTE_INSN_CALL_ARG_LOCATION) {
+                       insn = next;
+                       next = NEXT_INSN(insn);
+                       delete_insn_and_edges(insn);
+               }
+#endif
+       }
+
+       return 0;
+}
+
+static bool stackleak_gate(void)
+{
+       tree section;
+
+       section = lookup_attribute("section",
+                                  DECL_ATTRIBUTES(current_function_decl));
+       if (section && TREE_VALUE(section)) {
+               section = TREE_VALUE(TREE_VALUE(section));
+
+               if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10))
+                       return false;
+               if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13))
+                       return false;
+               if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13))
+                       return false;
+               if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13))
+                       return false;
+       }
+
+       return track_frame_size >= 0;
+}
+
+/* Build the function declaration for stackleak_track_stack() */
+static void stackleak_start_unit(void *gcc_data __unused,
+                                void *user_data __unused)
+{
+       tree fntype;
+
+       /* void stackleak_track_stack(void) */
+       fntype = build_function_type_list(void_type_node, NULL_TREE);
+       track_function_decl = build_fn_decl(track_function, fntype);
+       DECL_ASSEMBLER_NAME(track_function_decl); /* for LTO */
+       TREE_PUBLIC(track_function_decl) = 1;
+       TREE_USED(track_function_decl) = 1;
+       DECL_EXTERNAL(track_function_decl) = 1;
+       DECL_ARTIFICIAL(track_function_decl) = 1;
+       DECL_PRESERVE_P(track_function_decl) = 1;
+}
+
+/*
+ * Pass gate function is a predicate function that gets executed before the
+ * corresponding pass. If the return value is 'true' the pass gets executed,
+ * otherwise, it is skipped.
+ */
+static bool stackleak_instrument_gate(void)
+{
+       return stackleak_gate();
+}
+
+#define PASS_NAME stackleak_instrument
+#define PROPERTIES_REQUIRED PROP_gimple_leh | PROP_cfg
+#define TODO_FLAGS_START TODO_verify_ssa | TODO_verify_flow | TODO_verify_stmts
+#define TODO_FLAGS_FINISH TODO_verify_ssa | TODO_verify_stmts | TODO_dump_func \
+                       | TODO_update_ssa | TODO_rebuild_cgraph_edges
+#include "gcc-generate-gimple-pass.h"
+
+static bool stackleak_cleanup_gate(void)
+{
+       return stackleak_gate();
+}
+
+#define PASS_NAME stackleak_cleanup
+#define TODO_FLAGS_FINISH TODO_dump_func
+#include "gcc-generate-rtl-pass.h"
+
+/*
+ * Every gcc plugin exports a plugin_init() function that is called right
+ * after the plugin is loaded. This function is responsible for registering
+ * the plugin callbacks and doing other required initialization.
+ */
+__visible int plugin_init(struct plugin_name_args *plugin_info,
+                         struct plugin_gcc_version *version)
+{
+       const char * const plugin_name = plugin_info->base_name;
+       const int argc = plugin_info->argc;
+       const struct plugin_argument * const argv = plugin_info->argv;
+       int i = 0;
+
+       /* Extra GGC root tables describing our GTY-ed data */
+       static const struct ggc_root_tab gt_ggc_r_gt_stackleak[] = {
+               {
+                       .base = &track_function_decl,
+                       .nelt = 1,
+                       .stride = sizeof(track_function_decl),
+                       .cb = &gt_ggc_mx_tree_node,
+                       .pchw = &gt_pch_nx_tree_node
+               },
+               LAST_GGC_ROOT_TAB
+       };
+
+       /*
+        * The stackleak_instrument pass should be executed before the
+        * "optimized" pass, which is the control flow graph cleanup that is
+        * performed just before expanding gcc trees to the RTL. In former
+        * versions of the plugin this new pass was inserted before the
+        * "tree_profile" pass, which is currently called "profile".
+        */
+       PASS_INFO(stackleak_instrument, "optimized", 1,
+                                               PASS_POS_INSERT_BEFORE);
+
+       /*
+        * The stackleak_cleanup pass should be executed after the
+        * "reload" pass, when the stack frame size is final.
+        */
+       PASS_INFO(stackleak_cleanup, "reload", 1, PASS_POS_INSERT_AFTER);
+
+       if (!plugin_default_version_check(version, &gcc_version)) {
+               error(G_("incompatible gcc/plugin versions"));
+               return 1;
+       }
+
+       /* Parse the plugin arguments */
+       for (i = 0; i < argc; i++) {
+               if (!strcmp(argv[i].key, "disable"))
+                       return 0;
+
+               if (!strcmp(argv[i].key, "track-min-size")) {
+                       if (!argv[i].value) {
+                               error(G_("no value supplied for option '-fplugin-arg-%s-%s'"),
+                                       plugin_name, argv[i].key);
+                               return 1;
+                       }
+
+                       track_frame_size = atoi(argv[i].value);
+                       if (track_frame_size < 0) {
+                               error(G_("invalid option argument '-fplugin-arg-%s-%s=%s'"),
+                                       plugin_name, argv[i].key, argv[i].value);
+                               return 1;
+                       }
+               } else {
+                       error(G_("unknown option '-fplugin-arg-%s-%s'"),
+                                       plugin_name, argv[i].key);
+                       return 1;
+               }
+       }
+
+       /* Give the information about the plugin */
+       register_callback(plugin_name, PLUGIN_INFO, NULL,
+                                               &stackleak_plugin_info);
+
+       /* Register to be called before processing a translation unit */
+       register_callback(plugin_name, PLUGIN_START_UNIT,
+                                       &stackleak_start_unit, NULL);
+
+       /* Register an extra GCC garbage collector (GGC) root table */
+       register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL,
+                                       (void *)&gt_ggc_r_gt_stackleak);
+
+       /*
+        * Hook into the Pass Manager to register new gcc passes.
+        *
+        * The stack frame size info is available only at the last RTL pass,
+        * when it's too late to insert complex code like a function call.
+        * So we register two gcc passes to instrument every function at first
+        * and remove the unneeded instrumentation later.
+        */
+       register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+                                       &stackleak_instrument_pass_info);
+       register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+                                       &stackleak_cleanup_pass_info);
+
+       return 0;
+}
index 67ed9f6ccdf8f09d8cb193d864950ed4e14ed9f7..63b609243d03783beb1da485aaaa1e1d6f3c78eb 100644 (file)
@@ -68,21 +68,7 @@ PHONY += $(simple-targets)
 $(simple-targets): $(obj)/conf
        $< $(silent) --$@ $(Kconfig)
 
-PHONY += oldnoconfig silentoldconfig savedefconfig defconfig
-
-# oldnoconfig is an alias of olddefconfig, because people already are dependent
-# on its behavior (sets new symbols to their default value but not 'n') with the
-# counter-intuitive name.
-oldnoconfig: olddefconfig
-       @echo "  WARNING: \"oldnoconfig\" target will be removed after Linux 4.19"
-       @echo "            Please use \"olddefconfig\" instead, which is an alias."
-
-# We do not expect manual invokcation of "silentoldcofig" (or "syncconfig").
-silentoldconfig: syncconfig
-       @echo "  WARNING: \"silentoldconfig\" has been renamed to \"syncconfig\""
-       @echo "            and is now an internal implementation detail."
-       @echo "            What you want is probably \"oldconfig\"."
-       @echo "            \"silentoldconfig\" will be removed after Linux 4.19"
+PHONY += savedefconfig defconfig
 
 savedefconfig: $(obj)/conf
        $< $(silent) --$@=defconfig $(Kconfig)
index 7b2b37260669e333390f0c995990fe0fb65c9f06..98e0c7a34699e941402085b7ceda086ea29a79c8 100644 (file)
@@ -460,12 +460,6 @@ static struct option long_opts[] = {
        {"randconfig",      no_argument,       NULL, randconfig},
        {"listnewconfig",   no_argument,       NULL, listnewconfig},
        {"olddefconfig",    no_argument,       NULL, olddefconfig},
-       /*
-        * oldnoconfig is an alias of olddefconfig, because people already
-        * are dependent on its behavior(sets new symbols to their default
-        * value but not 'n') with the counter-intuitive name.
-        */
-       {"oldnoconfig",     no_argument,       NULL, olddefconfig},
        {NULL, 0, NULL, 0}
 };
 
@@ -480,7 +474,6 @@ static void conf_usage(const char *progname)
        printf("  --syncconfig            Similar to oldconfig but generates configuration in\n"
               "                          include/{generated/,config/}\n");
        printf("  --olddefconfig          Same as oldconfig but sets new symbols to their default value\n");
-       printf("  --oldnoconfig           An alias of olddefconfig\n");
        printf("  --defconfig <file>      New config with default defined in <file>\n");
        printf("  --savedefconfig <file>  Save the minimal current configuration to <file>\n");
        printf("  --allnoconfig           New config where all options are answered with no\n");
index 67d1314476314590f285a63700c13e047dc17c80..da66e7742282a65eb8457d3427b5bfab7f950415 100755 (executable)
@@ -33,12 +33,15 @@ usage() {
        echo "  -n    use allnoconfig instead of alldefconfig"
        echo "  -r    list redundant entries when merging fragments"
        echo "  -O    dir to put generated output files.  Consider setting \$KCONFIG_CONFIG instead."
+       echo
+       echo "Used prefix: '$CONFIG_PREFIX'. You can redefine it with \$CONFIG_ environment variable."
 }
 
 RUNMAKE=true
 ALLTARGET=alldefconfig
 WARNREDUN=false
 OUTPUT=.
+CONFIG_PREFIX=${CONFIG_-CONFIG_}
 
 while true; do
        case $1 in
@@ -99,7 +102,8 @@ if [ ! -r "$INITFILE" ]; then
 fi
 
 MERGE_LIST=$*
-SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(CONFIG_[a-zA-Z0-9_]*\)[= ].*/\2/p"
+SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(${CONFIG_PREFIX}[a-zA-Z0-9_]*\)[= ].*/\2/p"
+
 TMP_FILE=$(mktemp ./.tmp.config.XXXXXXXXXX)
 
 echo "Using $INITFILE as base"
index e09fe4d7307cd02f85919aefccacc19276e96be9..8963203319ea4005a4176515fce3a65f370dc1e3 100644 (file)
@@ -1742,7 +1742,7 @@ static int ns_rmdir_op(struct inode *dir, struct dentry *dentry)
        if (error)
                return error;
 
-        parent = aa_get_ns(dir->i_private);
+       parent = aa_get_ns(dir->i_private);
        /* rmdir calls the generic securityfs functions to remove files
         * from the apparmor dir. It is up to the apparmor ns locking
         * to avoid races.
index 4285943f7260f36f271b7d7ad773345fb84d97f0..d0afed9ebd0ed9cac3a4803f6f1622959802c9d9 100644 (file)
@@ -496,7 +496,7 @@ static void update_file_ctx(struct aa_file_ctx *fctx, struct aa_label *label,
        /* update caching of label on file_ctx */
        spin_lock(&fctx->lock);
        old = rcu_dereference_protected(fctx->label,
-                                       spin_is_locked(&fctx->lock));
+                                       lockdep_is_held(&fctx->lock));
        l = aa_label_merge(old, label, GFP_ATOMIC);
        if (l) {
                if (l != old) {
index e287b7d0d4bebcf2cc83d9add032fdb1feed54c0..265ae6641a0644e84e40c7f774f4f447a1df3fc2 100644 (file)
@@ -151,6 +151,8 @@ static inline struct aa_label *begin_current_label_crit_section(void)
 {
        struct aa_label *label = aa_current_raw_label();
 
+       might_sleep();
+
        if (label_is_stale(label)) {
                label = aa_get_newest_label(label);
                if (aa_replace_current_label(label) == 0)
index ec7228e857a90d6dabf37de1136c6418d3716e3e..7334ac966d018d9d11220c9ffe30e0863bd6f700 100644 (file)
@@ -83,6 +83,13 @@ struct aa_sk_ctx {
        __e;                                    \
 })
 
+struct aa_secmark {
+       u8 audit;
+       u8 deny;
+       u32 secid;
+       char *label;
+};
+
 extern struct aa_sfs_entry aa_sfs_entry_network[];
 
 void audit_net_cb(struct audit_buffer *ab, void *va);
@@ -103,4 +110,7 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk);
 int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
                      struct socket *sock);
 
+int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
+                          u32 secid, struct sock *sk);
+
 #endif /* __AA_NET_H */
index ab64c6b5db5aca7aaae1b16a800d48c7499bc498..8e6707c837befae75d6e70accd3c5586ec8edfe0 100644 (file)
@@ -155,6 +155,9 @@ struct aa_profile {
 
        struct aa_rlimit rlimits;
 
+       int secmark_count;
+       struct aa_secmark *secmark;
+
        struct aa_loaddata *rawdata;
        unsigned char *hash;
        char *dirname;
index dee6fa3b6081e1342bfa3e0c4077ea960ca7bfff..fa2062711b63e75a9cf6598c979de04ffd0d3e40 100644 (file)
@@ -22,6 +22,9 @@ struct aa_label;
 /* secid value that will not be allocated */
 #define AA_SECID_INVALID 0
 
+/* secid value that matches any other secid */
+#define AA_SECID_WILDCARD 1
+
 struct aa_label *aa_secid_to_label(u32 secid);
 int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
index 974affe505314bcd5ecef87966ba698de1dd853e..76491e7f4177fa94c9d742e91d049a5ebcfe04e1 100644 (file)
@@ -90,10 +90,12 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name,
        const char *end = fqname + n;
        const char *name = skipn_spaces(fqname, n);
 
-       if (!name)
-               return NULL;
        *ns_name = NULL;
        *ns_len = 0;
+
+       if (!name)
+               return NULL;
+
        if (name[0] == ':') {
                char *split = strnchr(&name[1], end - &name[1], ':');
                *ns_name = skipn_spaces(&name[1], end - &name[1]);
index aa35939443c47799afc720d6119427389dc0bea2..42446a216f3bcfccfe2b0f099f4fa044c1efe6da 100644 (file)
@@ -23,6 +23,8 @@
 #include <linux/sysctl.h>
 #include <linux/audit.h>
 #include <linux/user_namespace.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/sock.h>
 
 #include "include/apparmor.h"
@@ -114,13 +116,13 @@ static int apparmor_ptrace_access_check(struct task_struct *child,
        struct aa_label *tracer, *tracee;
        int error;
 
-       tracer = begin_current_label_crit_section();
+       tracer = __begin_current_label_crit_section();
        tracee = aa_get_task_label(child);
        error = aa_may_ptrace(tracer, tracee,
                        (mode & PTRACE_MODE_READ) ? AA_PTRACE_READ
                                                  : AA_PTRACE_TRACE);
        aa_put_label(tracee);
-       end_current_label_crit_section(tracer);
+       __end_current_label_crit_section(tracer);
 
        return error;
 }
@@ -130,11 +132,11 @@ static int apparmor_ptrace_traceme(struct task_struct *parent)
        struct aa_label *tracer, *tracee;
        int error;
 
-       tracee = begin_current_label_crit_section();
+       tracee = __begin_current_label_crit_section();
        tracer = aa_get_task_label(parent);
        error = aa_may_ptrace(tracer, tracee, AA_PTRACE_TRACE);
        aa_put_label(tracer);
-       end_current_label_crit_section(tracee);
+       __end_current_label_crit_section(tracee);
 
        return error;
 }
@@ -1020,6 +1022,7 @@ static int apparmor_socket_shutdown(struct socket *sock, int how)
        return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
 }
 
+#ifdef CONFIG_NETWORK_SECMARK
 /**
  * apparmor_socket_sock_recv_skb - check perms before associating skb to sk
  *
@@ -1030,8 +1033,15 @@ static int apparmor_socket_shutdown(struct socket *sock, int how)
  */
 static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
-       return 0;
+       struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+       if (!skb->secmark)
+               return 0;
+
+       return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE,
+                                     skb->secmark, sk);
 }
+#endif
 
 
 static struct aa_label *sk_peer_label(struct sock *sk)
@@ -1126,6 +1136,20 @@ static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
                ctx->label = aa_get_current_label();
 }
 
+#ifdef CONFIG_NETWORK_SECMARK
+static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb,
+                                     struct request_sock *req)
+{
+       struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+       if (!skb->secmark)
+               return 0;
+
+       return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT,
+                                     skb->secmark, sk);
+}
+#endif
+
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
        LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
        LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
@@ -1177,12 +1201,17 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
        LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt),
        LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt),
        LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown),
+#ifdef CONFIG_NETWORK_SECMARK
        LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb),
+#endif
        LSM_HOOK_INIT(socket_getpeersec_stream,
                      apparmor_socket_getpeersec_stream),
        LSM_HOOK_INIT(socket_getpeersec_dgram,
                      apparmor_socket_getpeersec_dgram),
        LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
+#ifdef CONFIG_NETWORK_SECMARK
+       LSM_HOOK_INIT(inet_conn_request, apparmor_inet_conn_request),
+#endif
 
        LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
        LSM_HOOK_INIT(cred_free, apparmor_cred_free),
@@ -1538,6 +1567,97 @@ static inline int apparmor_init_sysctl(void)
 }
 #endif /* CONFIG_SYSCTL */
 
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETWORK_SECMARK)
+static unsigned int apparmor_ip_postroute(void *priv,
+                                         struct sk_buff *skb,
+                                         const struct nf_hook_state *state)
+{
+       struct aa_sk_ctx *ctx;
+       struct sock *sk;
+
+       if (!skb->secmark)
+               return NF_ACCEPT;
+
+       sk = skb_to_full_sk(skb);
+       if (sk == NULL)
+               return NF_ACCEPT;
+
+       ctx = SK_CTX(sk);
+       if (!apparmor_secmark_check(ctx->label, OP_SENDMSG, AA_MAY_SEND,
+                                   skb->secmark, sk))
+               return NF_ACCEPT;
+
+       return NF_DROP_ERR(-ECONNREFUSED);
+
+}
+
+static unsigned int apparmor_ipv4_postroute(void *priv,
+                                           struct sk_buff *skb,
+                                           const struct nf_hook_state *state)
+{
+       return apparmor_ip_postroute(priv, skb, state);
+}
+
+static unsigned int apparmor_ipv6_postroute(void *priv,
+                                           struct sk_buff *skb,
+                                           const struct nf_hook_state *state)
+{
+       return apparmor_ip_postroute(priv, skb, state);
+}
+
+static const struct nf_hook_ops apparmor_nf_ops[] = {
+       {
+               .hook =         apparmor_ipv4_postroute,
+               .pf =           NFPROTO_IPV4,
+               .hooknum =      NF_INET_POST_ROUTING,
+               .priority =     NF_IP_PRI_SELINUX_FIRST,
+       },
+#if IS_ENABLED(CONFIG_IPV6)
+       {
+               .hook =         apparmor_ipv6_postroute,
+               .pf =           NFPROTO_IPV6,
+               .hooknum =      NF_INET_POST_ROUTING,
+               .priority =     NF_IP6_PRI_SELINUX_FIRST,
+       },
+#endif
+};
+
+static int __net_init apparmor_nf_register(struct net *net)
+{
+       int ret;
+
+       ret = nf_register_net_hooks(net, apparmor_nf_ops,
+                                   ARRAY_SIZE(apparmor_nf_ops));
+       return ret;
+}
+
+static void __net_exit apparmor_nf_unregister(struct net *net)
+{
+       nf_unregister_net_hooks(net, apparmor_nf_ops,
+                               ARRAY_SIZE(apparmor_nf_ops));
+}
+
+static struct pernet_operations apparmor_net_ops = {
+       .init = apparmor_nf_register,
+       .exit = apparmor_nf_unregister,
+};
+
+static int __init apparmor_nf_ip_init(void)
+{
+       int err;
+
+       if (!apparmor_enabled)
+               return 0;
+
+       err = register_pernet_subsys(&apparmor_net_ops);
+       if (err)
+               panic("Apparmor: register_pernet_subsys: error %d\n", err);
+
+       return 0;
+}
+__initcall(apparmor_nf_ip_init);
+#endif
+
 static int __init apparmor_init(void)
 {
        int error;
index bb24cfa0a164cbbc0fbebe0b5e85a176bf840069..c07fde444792d8caee5bea5ef99edc62117bdb34 100644 (file)
@@ -18,6 +18,7 @@
 #include "include/label.h"
 #include "include/net.h"
 #include "include/policy.h"
+#include "include/secid.h"
 
 #include "net_names.h"
 
@@ -146,17 +147,20 @@ int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
 static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request,
                            struct sock *sk)
 {
-       struct aa_profile *profile;
-       DEFINE_AUDIT_SK(sa, op, sk);
+       int error = 0;
 
        AA_BUG(!label);
        AA_BUG(!sk);
 
-       if (unconfined(label))
-               return 0;
+       if (!unconfined(label)) {
+               struct aa_profile *profile;
+               DEFINE_AUDIT_SK(sa, op, sk);
 
-       return fn_for_each_confined(label, profile,
-                       aa_profile_af_sk_perm(profile, &sa, request, sk));
+               error = fn_for_each_confined(label, profile,
+                           aa_profile_af_sk_perm(profile, &sa, request, sk));
+       }
+
+       return error;
 }
 
 int aa_sk_perm(const char *op, u32 request, struct sock *sk)
@@ -185,3 +189,70 @@ int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
 
        return aa_label_sk_perm(label, op, request, sock->sk);
 }
+
+#ifdef CONFIG_NETWORK_SECMARK
+static int apparmor_secmark_init(struct aa_secmark *secmark)
+{
+       struct aa_label *label;
+
+       if (secmark->label[0] == '*') {
+               secmark->secid = AA_SECID_WILDCARD;
+               return 0;
+       }
+
+       label = aa_label_strn_parse(&root_ns->unconfined->label,
+                                   secmark->label, strlen(secmark->label),
+                                   GFP_ATOMIC, false, false);
+
+       if (IS_ERR(label))
+               return PTR_ERR(label);
+
+       secmark->secid = label->secid;
+
+       return 0;
+}
+
+static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid,
+                          struct common_audit_data *sa, struct sock *sk)
+{
+       int i, ret;
+       struct aa_perms perms = { };
+
+       if (profile->secmark_count == 0)
+               return 0;
+
+       for (i = 0; i < profile->secmark_count; i++) {
+               if (!profile->secmark[i].secid) {
+                       ret = apparmor_secmark_init(&profile->secmark[i]);
+                       if (ret)
+                               return ret;
+               }
+
+               if (profile->secmark[i].secid == secid ||
+                   profile->secmark[i].secid == AA_SECID_WILDCARD) {
+                       if (profile->secmark[i].deny)
+                               perms.deny = ALL_PERMS_MASK;
+                       else
+                               perms.allow = ALL_PERMS_MASK;
+
+                       if (profile->secmark[i].audit)
+                               perms.audit = ALL_PERMS_MASK;
+               }
+       }
+
+       aa_apply_modes_to_perms(profile, &perms);
+
+       return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
+}
+
+int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
+                          u32 secid, struct sock *sk)
+{
+       struct aa_profile *profile;
+       DEFINE_AUDIT_SK(sa, op, sk);
+
+       return fn_for_each_confined(label, profile,
+                                   aa_secmark_perm(profile, request, secid,
+                                                   &sa, sk));
+}
+#endif
index 1590e2de4e841c131ac472fa5e9c312d448b0866..df9c5890a87891dc1707fc51171148d01d952ea5 100644 (file)
@@ -231,6 +231,9 @@ void aa_free_profile(struct aa_profile *profile)
        for (i = 0; i < profile->xattr_count; i++)
                kzfree(profile->xattrs[i]);
        kzfree(profile->xattrs);
+       for (i = 0; i < profile->secmark_count; i++)
+               kzfree(profile->secmark[i].label);
+       kzfree(profile->secmark);
        kzfree(profile->dirname);
        aa_put_dfa(profile->xmatch);
        aa_put_dfa(profile->policy.dfa);
index 21cb384d712a2865d1da6f8d12625c250993b28e..379682e2a8d5db7e793fd1ebdd3d93f569bb3b4c 100644 (file)
@@ -292,6 +292,19 @@ fail:
        return 0;
 }
 
+static bool unpack_u8(struct aa_ext *e, u8 *data, const char *name)
+{
+       if (unpack_nameX(e, AA_U8, name)) {
+               if (!inbounds(e, sizeof(u8)))
+                       return 0;
+               if (data)
+                       *data = get_unaligned((u8 *)e->pos);
+               e->pos += sizeof(u8);
+               return 1;
+       }
+       return 0;
+}
+
 static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name)
 {
        if (unpack_nameX(e, AA_U32, name)) {
@@ -529,6 +542,49 @@ fail:
        return 0;
 }
 
+static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile)
+{
+       void *pos = e->pos;
+       int i, size;
+
+       if (unpack_nameX(e, AA_STRUCT, "secmark")) {
+               size = unpack_array(e, NULL);
+
+               profile->secmark = kcalloc(size, sizeof(struct aa_secmark),
+                                          GFP_KERNEL);
+               if (!profile->secmark)
+                       goto fail;
+
+               profile->secmark_count = size;
+
+               for (i = 0; i < size; i++) {
+                       if (!unpack_u8(e, &profile->secmark[i].audit, NULL))
+                               goto fail;
+                       if (!unpack_u8(e, &profile->secmark[i].deny, NULL))
+                               goto fail;
+                       if (!unpack_strdup(e, &profile->secmark[i].label, NULL))
+                               goto fail;
+               }
+               if (!unpack_nameX(e, AA_ARRAYEND, NULL))
+                       goto fail;
+               if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+                       goto fail;
+       }
+
+       return 1;
+
+fail:
+       if (profile->secmark) {
+               for (i = 0; i < size; i++)
+                       kfree(profile->secmark[i].label);
+               kfree(profile->secmark);
+               profile->secmark_count = 0;
+       }
+
+       e->pos = pos;
+       return 0;
+}
+
 static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
 {
        void *pos = e->pos;
@@ -727,6 +783,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
                goto fail;
        }
 
+       if (!unpack_secmark(e, profile)) {
+               info = "failed to unpack profile secmark rules";
+               goto fail;
+       }
+
        if (unpack_nameX(e, AA_STRUCT, "policydb")) {
                /* generic policy dfa - optional and may be NULL */
                info = "failed to unpack policydb";
index 4ccec1bcf6f54f261542a546458cab77c6af9e52..05373d9a3d6af10643050e7a825c04b3aef30c96 100644 (file)
@@ -32,8 +32,7 @@
  * secids - do not pin labels with a refcount. They rely on the label
  * properly updating/freeing them
  */
-
-#define AA_FIRST_SECID 1
+#define AA_FIRST_SECID 2
 
 static DEFINE_IDR(aa_secids);
 static DEFINE_SPINLOCK(secid_lock);
index ef1581b337a3dc67dedd2e43cec94d28e5188cbd..9cef54064f6084a3694a33eae143f960aebba979 100644 (file)
@@ -22,6 +22,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSCTL) += sysctl.o
 obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
 obj-$(CONFIG_KEY_DH_OPERATIONS) += dh.o
+obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += keyctl_pkey.o
 
 #
 # Key types
index e87c89c0177c1559e9cc5e8874b4231de1dc3437..9482df601dc33de3b183b02b66b5964e884a5ba8 100644 (file)
@@ -141,6 +141,24 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
                return keyctl_restrict_keyring(arg2, compat_ptr(arg3),
                                               compat_ptr(arg4));
 
+       case KEYCTL_PKEY_QUERY:
+               if (arg3 != 0)
+                       return -EINVAL;
+               return keyctl_pkey_query(arg2,
+                                        compat_ptr(arg4),
+                                        compat_ptr(arg5));
+
+       case KEYCTL_PKEY_ENCRYPT:
+       case KEYCTL_PKEY_DECRYPT:
+       case KEYCTL_PKEY_SIGN:
+               return keyctl_pkey_e_d_s(option,
+                                        compat_ptr(arg2), compat_ptr(arg3),
+                                        compat_ptr(arg4), compat_ptr(arg5));
+
+       case KEYCTL_PKEY_VERIFY:
+               return keyctl_pkey_verify(compat_ptr(arg2), compat_ptr(arg3),
+                                         compat_ptr(arg4), compat_ptr(arg5));
+
        default:
                return -EOPNOTSUPP;
        }
index 9f8208dc0e55829c7e5821b3f123148e5f7dc801..74cb0ff42fedbca403a282058be7219109b1b3c6 100644 (file)
@@ -298,6 +298,45 @@ static inline long compat_keyctl_dh_compute(
 #endif
 #endif
 
+#ifdef CONFIG_ASYMMETRIC_KEY_TYPE
+extern long keyctl_pkey_query(key_serial_t,
+                             const char __user *,
+                             struct keyctl_pkey_query __user *);
+
+extern long keyctl_pkey_verify(const struct keyctl_pkey_params __user *,
+                              const char __user *,
+                              const void __user *, const void __user *);
+
+extern long keyctl_pkey_e_d_s(int,
+                             const struct keyctl_pkey_params __user *,
+                             const char __user *,
+                             const void __user *, void __user *);
+#else
+static inline long keyctl_pkey_query(key_serial_t id,
+                                    const char __user *_info,
+                                    struct keyctl_pkey_query __user *_res)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline long keyctl_pkey_verify(const struct keyctl_pkey_params __user *params,
+                                     const char __user *_info,
+                                     const void __user *_in,
+                                     const void __user *_in2)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline long keyctl_pkey_e_d_s(int op,
+                                    const struct keyctl_pkey_params __user *params,
+                                    const char __user *_info,
+                                    const void __user *_in,
+                                    void __user *_out)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 /*
  * Debugging key validation
  */
index 1ffe60bb2845f97638157b01ed7fcc4f45714312..18619690ce77a926307796ec3be1626a1023db4c 100644 (file)
@@ -1747,6 +1747,30 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
                                               (const char __user *) arg3,
                                               (const char __user *) arg4);
 
+       case KEYCTL_PKEY_QUERY:
+               if (arg3 != 0)
+                       return -EINVAL;
+               return keyctl_pkey_query((key_serial_t)arg2,
+                                        (const char __user *)arg4,
+                                        (struct keyctl_pkey_query *)arg5);
+
+       case KEYCTL_PKEY_ENCRYPT:
+       case KEYCTL_PKEY_DECRYPT:
+       case KEYCTL_PKEY_SIGN:
+               return keyctl_pkey_e_d_s(
+                       option,
+                       (const struct keyctl_pkey_params __user *)arg2,
+                       (const char __user *)arg3,
+                       (const void __user *)arg4,
+                       (void __user *)arg5);
+
+       case KEYCTL_PKEY_VERIFY:
+               return keyctl_pkey_verify(
+                       (const struct keyctl_pkey_params __user *)arg2,
+                       (const char __user *)arg3,
+                       (const void __user *)arg4,
+                       (const void __user *)arg5);
+
        default:
                return -EOPNOTSUPP;
        }
diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c
new file mode 100644 (file)
index 0000000..7839788
--- /dev/null
@@ -0,0 +1,323 @@
+/* Public-key operation keyctls
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/parser.h>
+#include <linux/uaccess.h>
+#include <keys/user-type.h>
+#include "internal.h"
+
+static void keyctl_pkey_params_free(struct kernel_pkey_params *params)
+{
+       kfree(params->info);
+       key_put(params->key);
+}
+
+enum {
+       Opt_err = -1,
+       Opt_enc,                /* "enc=<encoding>" eg. "enc=oaep" */
+       Opt_hash,               /* "hash=<digest-name>" eg. "hash=sha1" */
+};
+
+static const match_table_t param_keys = {
+       { Opt_enc,      "enc=%s" },
+       { Opt_hash,     "hash=%s" },
+       { Opt_err,      NULL }
+};
+
+/*
+ * Parse the information string which consists of key=val pairs.
+ */
+static int keyctl_pkey_params_parse(struct kernel_pkey_params *params)
+{
+       unsigned long token_mask = 0;
+       substring_t args[MAX_OPT_ARGS];
+       char *c = params->info, *p, *q;
+       int token;
+
+       while ((p = strsep(&c, " \t"))) {
+               if (*p == '\0' || *p == ' ' || *p == '\t')
+                       continue;
+               token = match_token(p, param_keys, args);
+               if (__test_and_set_bit(token, &token_mask))
+                       return -EINVAL;
+               q = args[0].from;
+               if (!q[0])
+                       return -EINVAL;
+
+               switch (token) {
+               case Opt_enc:
+                       params->encoding = q;
+                       break;
+
+               case Opt_hash:
+                       params->hash_algo = q;
+                       break;
+
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Interpret parameters.  Callers must always call the free function
+ * on params, even if an error is returned.
+ */
+static int keyctl_pkey_params_get(key_serial_t id,
+                                 const char __user *_info,
+                                 struct kernel_pkey_params *params)
+{
+       key_ref_t key_ref;
+       void *p;
+       int ret;
+
+       memset(params, 0, sizeof(*params));
+       params->encoding = "raw";
+
+       p = strndup_user(_info, PAGE_SIZE);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+       params->info = p;
+
+       ret = keyctl_pkey_params_parse(params);
+       if (ret < 0)
+               return ret;
+
+       key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
+       if (IS_ERR(key_ref))
+               return PTR_ERR(key_ref);
+       params->key = key_ref_to_ptr(key_ref);
+
+       if (!params->key->type->asym_query)
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+
+/*
+ * Get parameters from userspace.  Callers must always call the free function
+ * on params, even if an error is returned.
+ */
+static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_params,
+                                   const char __user *_info,
+                                   int op,
+                                   struct kernel_pkey_params *params)
+{
+       struct keyctl_pkey_params uparams;
+       struct kernel_pkey_query info;
+       int ret;
+
+       memset(params, 0, sizeof(*params));
+       params->encoding = "raw";
+
+       if (copy_from_user(&uparams, _params, sizeof(uparams)) != 0)
+               return -EFAULT;
+
+       ret = keyctl_pkey_params_get(uparams.key_id, _info, params);
+       if (ret < 0)
+               return ret;
+
+       ret = params->key->type->asym_query(params, &info);
+       if (ret < 0)
+               return ret;
+
+       switch (op) {
+       case KEYCTL_PKEY_ENCRYPT:
+       case KEYCTL_PKEY_DECRYPT:
+               if (uparams.in_len  > info.max_enc_size ||
+                   uparams.out_len > info.max_dec_size)
+                       return -EINVAL;
+               break;
+       case KEYCTL_PKEY_SIGN:
+       case KEYCTL_PKEY_VERIFY:
+               if (uparams.in_len  > info.max_sig_size ||
+                   uparams.out_len > info.max_data_size)
+                       return -EINVAL;
+               break;
+       default:
+               BUG();
+       }
+
+       params->in_len  = uparams.in_len;
+       params->out_len = uparams.out_len;
+       return 0;
+}
+
+/*
+ * Query information about an asymmetric key.
+ */
+long keyctl_pkey_query(key_serial_t id,
+                      const char __user *_info,
+                      struct keyctl_pkey_query __user *_res)
+{
+       struct kernel_pkey_params params;
+       struct kernel_pkey_query res;
+       long ret;
+
+       memset(&params, 0, sizeof(params));
+
+       ret = keyctl_pkey_params_get(id, _info, &params);
+       if (ret < 0)
+               goto error;
+
+       ret = params.key->type->asym_query(&params, &res);
+       if (ret < 0)
+               goto error;
+
+       ret = -EFAULT;
+       if (copy_to_user(_res, &res, sizeof(res)) == 0 &&
+           clear_user(_res->__spare, sizeof(_res->__spare)) == 0)
+               ret = 0;
+
+error:
+       keyctl_pkey_params_free(&params);
+       return ret;
+}
+
+/*
+ * Encrypt/decrypt/sign
+ *
+ * Encrypt data, decrypt data or sign data using a public key.
+ *
+ * _info is a string of supplementary information in key=val format.  For
+ * instance, it might contain:
+ *
+ *     "enc=pkcs1 hash=sha256"
+ *
+ * where enc= specifies the encoding and hash= selects the OID to go in that
+ * particular encoding if required.  If enc= isn't supplied, it's assumed that
+ * the caller is supplying raw values.
+ *
+ * If successful, the amount of data written into the output buffer is
+ * returned.
+ */
+long keyctl_pkey_e_d_s(int op,
+                      const struct keyctl_pkey_params __user *_params,
+                      const char __user *_info,
+                      const void __user *_in,
+                      void __user *_out)
+{
+       struct kernel_pkey_params params;
+       void *in, *out;
+       long ret;
+
+       ret = keyctl_pkey_params_get_2(_params, _info, op, &params);
+       if (ret < 0)
+               goto error_params;
+
+       ret = -EOPNOTSUPP;
+       if (!params.key->type->asym_eds_op)
+               goto error_params;
+
+       switch (op) {
+       case KEYCTL_PKEY_ENCRYPT:
+               params.op = kernel_pkey_encrypt;
+               break;
+       case KEYCTL_PKEY_DECRYPT:
+               params.op = kernel_pkey_decrypt;
+               break;
+       case KEYCTL_PKEY_SIGN:
+               params.op = kernel_pkey_sign;
+               break;
+       default:
+               BUG();
+       }
+
+       in = memdup_user(_in, params.in_len);
+       if (IS_ERR(in)) {
+               ret = PTR_ERR(in);
+               goto error_params;
+       }
+
+       ret = -ENOMEM;
+       out = kmalloc(params.out_len, GFP_KERNEL);
+       if (!out)
+               goto error_in;
+
+       ret = params.key->type->asym_eds_op(&params, in, out);
+       if (ret < 0)
+               goto error_out;
+
+       if (copy_to_user(_out, out, ret) != 0)
+               ret = -EFAULT;
+
+error_out:
+       kfree(out);
+error_in:
+       kfree(in);
+error_params:
+       keyctl_pkey_params_free(&params);
+       return ret;
+}
+
+/*
+ * Verify a signature.
+ *
+ * Verify a public key signature using the given key, or if not given, search
+ * for a matching key.
+ *
+ * _info is a string of supplementary information in key=val format.  For
+ * instance, it might contain:
+ *
+ *     "enc=pkcs1 hash=sha256"
+ *
+ * where enc= specifies the signature blob encoding and hash= selects the OID
+ * to go in that particular encoding.  If enc= isn't supplied, it's assumed
+ * that the caller is supplying raw values.
+ *
+ * If successful, 0 is returned.
+ */
+long keyctl_pkey_verify(const struct keyctl_pkey_params __user *_params,
+                       const char __user *_info,
+                       const void __user *_in,
+                       const void __user *_in2)
+{
+       struct kernel_pkey_params params;
+       void *in, *in2;
+       long ret;
+
+       ret = keyctl_pkey_params_get_2(_params, _info, KEYCTL_PKEY_VERIFY,
+                                      &params);
+       if (ret < 0)
+               goto error_params;
+
+       ret = -EOPNOTSUPP;
+       if (!params.key->type->asym_verify_signature)
+               goto error_params;
+
+       in = memdup_user(_in, params.in_len);
+       if (IS_ERR(in)) {
+               ret = PTR_ERR(in);
+               goto error_params;
+       }
+
+       in2 = memdup_user(_in2, params.in2_len);
+       if (IS_ERR(in2)) {
+               ret = PTR_ERR(in2);
+               goto error_in;
+       }
+
+       params.op = kernel_pkey_verify;
+       ret = params.key->type->asym_verify_signature(&params, in, in2);
+
+       kfree(in2);
+error_in:
+       kfree(in);
+error_params:
+       keyctl_pkey_params_free(&params);
+       return ret;
+}
index b69d3b1777c25d1d3f9cc5af3514352ed0220fcc..ff6789365a12fb15ed91e160ba2936262ec5c29c 100644 (file)
@@ -30,7 +30,7 @@
 #include <linux/tpm.h>
 #include <linux/tpm_command.h>
 
-#include "trusted.h"
+#include <keys/trusted.h>
 
 static const char hmac_alg[] = "hmac(sha1)";
 static const char hash_alg[] = "sha1";
@@ -121,7 +121,7 @@ out:
 /*
  * calculate authorization info fields to send to TPM
  */
-static int TSS_authhmac(unsigned char *digest, const unsigned char *key,
+int TSS_authhmac(unsigned char *digest, const unsigned char *key,
                        unsigned int keylen, unsigned char *h1,
                        unsigned char *h2, unsigned char h3, ...)
 {
@@ -168,11 +168,12 @@ out:
        kzfree(sdesc);
        return ret;
 }
+EXPORT_SYMBOL_GPL(TSS_authhmac);
 
 /*
  * verify the AUTH1_COMMAND (Seal) result from TPM
  */
-static int TSS_checkhmac1(unsigned char *buffer,
+int TSS_checkhmac1(unsigned char *buffer,
                          const uint32_t command,
                          const unsigned char *ononce,
                          const unsigned char *key,
@@ -249,6 +250,7 @@ out:
        kzfree(sdesc);
        return ret;
 }
+EXPORT_SYMBOL_GPL(TSS_checkhmac1);
 
 /*
  * verify the AUTH2_COMMAND (unseal) result from TPM
@@ -355,7 +357,7 @@ out:
  * For key specific tpm requests, we will generate and send our
  * own TPM command packets using the drivers send function.
  */
-static int trusted_tpm_send(unsigned char *cmd, size_t buflen)
+int trusted_tpm_send(unsigned char *cmd, size_t buflen)
 {
        int rc;
 
@@ -367,6 +369,7 @@ static int trusted_tpm_send(unsigned char *cmd, size_t buflen)
                rc = -EPERM;
        return rc;
 }
+EXPORT_SYMBOL_GPL(trusted_tpm_send);
 
 /*
  * Lock a trusted key, by extending a selected PCR.
@@ -425,7 +428,7 @@ static int osap(struct tpm_buf *tb, struct osapsess *s,
 /*
  * Create an object independent authorisation protocol (oiap) session
  */
-static int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
+int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
 {
        int ret;
 
@@ -442,6 +445,7 @@ static int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
               TPM_NONCE_SIZE);
        return 0;
 }
+EXPORT_SYMBOL_GPL(oiap);
 
 struct tpm_digests {
        unsigned char encauth[SHA1_DIGEST_SIZE];
diff --git a/security/keys/trusted.h b/security/keys/trusted.h
deleted file mode 100644 (file)
index 8d5fe9e..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __TRUSTED_KEY_H
-#define __TRUSTED_KEY_H
-
-/* implementation specific TPM constants */
-#define MAX_BUF_SIZE                   512
-#define TPM_GETRANDOM_SIZE             14
-#define TPM_OSAP_SIZE                  36
-#define TPM_OIAP_SIZE                  10
-#define TPM_SEAL_SIZE                  87
-#define TPM_UNSEAL_SIZE                        104
-#define TPM_SIZE_OFFSET                        2
-#define TPM_RETURN_OFFSET              6
-#define TPM_DATA_OFFSET                        10
-
-#define LOAD32(buffer, offset) (ntohl(*(uint32_t *)&buffer[offset]))
-#define LOAD32N(buffer, offset)        (*(uint32_t *)&buffer[offset])
-#define LOAD16(buffer, offset) (ntohs(*(uint16_t *)&buffer[offset]))
-
-struct tpm_buf {
-       int len;
-       unsigned char data[MAX_BUF_SIZE];
-};
-
-#define INIT_BUF(tb) (tb->len = 0)
-
-struct osapsess {
-       uint32_t handle;
-       unsigned char secret[SHA1_DIGEST_SIZE];
-       unsigned char enonce[TPM_NONCE_SIZE];
-};
-
-/* discrete values, but have to store in uint16_t for TPM use */
-enum {
-       SEAL_keytype = 1,
-       SRK_keytype = 4
-};
-
-#define TPM_DEBUG 0
-
-#if TPM_DEBUG
-static inline void dump_options(struct trusted_key_options *o)
-{
-       pr_info("trusted_key: sealing key type %d\n", o->keytype);
-       pr_info("trusted_key: sealing key handle %0X\n", o->keyhandle);
-       pr_info("trusted_key: pcrlock %d\n", o->pcrlock);
-       pr_info("trusted_key: pcrinfo %d\n", o->pcrinfo_len);
-       print_hex_dump(KERN_INFO, "pcrinfo ", DUMP_PREFIX_NONE,
-                      16, 1, o->pcrinfo, o->pcrinfo_len, 0);
-}
-
-static inline void dump_payload(struct trusted_key_payload *p)
-{
-       pr_info("trusted_key: key_len %d\n", p->key_len);
-       print_hex_dump(KERN_INFO, "key ", DUMP_PREFIX_NONE,
-                      16, 1, p->key, p->key_len, 0);
-       pr_info("trusted_key: bloblen %d\n", p->blob_len);
-       print_hex_dump(KERN_INFO, "blob ", DUMP_PREFIX_NONE,
-                      16, 1, p->blob, p->blob_len, 0);
-       pr_info("trusted_key: migratable %d\n", p->migratable);
-}
-
-static inline void dump_sess(struct osapsess *s)
-{
-       print_hex_dump(KERN_INFO, "trusted-key: handle ", DUMP_PREFIX_NONE,
-                      16, 1, &s->handle, 4, 0);
-       pr_info("trusted-key: secret:\n");
-       print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE,
-                      16, 1, &s->secret, SHA1_DIGEST_SIZE, 0);
-       pr_info("trusted-key: enonce:\n");
-       print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE,
-                      16, 1, &s->enonce, SHA1_DIGEST_SIZE, 0);
-}
-
-static inline void dump_tpm_buf(unsigned char *buf)
-{
-       int len;
-
-       pr_info("\ntrusted-key: tpm buffer\n");
-       len = LOAD32(buf, TPM_SIZE_OFFSET);
-       print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, buf, len, 0);
-}
-#else
-static inline void dump_options(struct trusted_key_options *o)
-{
-}
-
-static inline void dump_payload(struct trusted_key_payload *p)
-{
-}
-
-static inline void dump_sess(struct osapsess *s)
-{
-}
-
-static inline void dump_tpm_buf(unsigned char *buf)
-{
-}
-#endif
-
-static inline void store8(struct tpm_buf *buf, const unsigned char value)
-{
-       buf->data[buf->len++] = value;
-}
-
-static inline void store16(struct tpm_buf *buf, const uint16_t value)
-{
-       *(uint16_t *) & buf->data[buf->len] = htons(value);
-       buf->len += sizeof value;
-}
-
-static inline void store32(struct tpm_buf *buf, const uint32_t value)
-{
-       *(uint32_t *) & buf->data[buf->len] = htonl(value);
-       buf->len += sizeof value;
-}
-
-static inline void storebytes(struct tpm_buf *buf, const unsigned char *in,
-                             const int len)
-{
-       memcpy(buf->data + buf->len, in, len);
-       buf->len += len;
-}
-#endif
index fcd965f1d69e820de7824de01365d95e73eb44a3..9be76c808fccf09ef8a8785152657510bcef1f1e 100644 (file)
@@ -146,53 +146,22 @@ static int apply_constraint_to_size(struct snd_pcm_hw_params *params,
        struct snd_interval *s = hw_param_interval(params, rule->var);
        const struct snd_interval *r =
                hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE);
-       struct snd_interval t = {
-               .min = s->min, .max = s->max, .integer = 1,
-       };
+       struct snd_interval t = {0};
+       unsigned int step = 0;
        int i;
 
        for (i = 0; i < CIP_SFC_COUNT; ++i) {
-               unsigned int rate = amdtp_rate_table[i];
-               unsigned int step = amdtp_syt_intervals[i];
-
-               if (!snd_interval_test(r, rate))
-                       continue;
-
-               t.min = roundup(t.min, step);
-               t.max = rounddown(t.max, step);
+               if (snd_interval_test(r, amdtp_rate_table[i]))
+                       step = max(step, amdtp_syt_intervals[i]);
        }
 
-       if (snd_interval_checkempty(&t))
-               return -EINVAL;
+       t.min = roundup(s->min, step);
+       t.max = rounddown(s->max, step);
+       t.integer = 1;
 
        return snd_interval_refine(s, &t);
 }
 
-static int apply_constraint_to_rate(struct snd_pcm_hw_params *params,
-                                   struct snd_pcm_hw_rule *rule)
-{
-       struct snd_interval *r =
-                       hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
-       const struct snd_interval *s = hw_param_interval_c(params, rule->deps[0]);
-       struct snd_interval t = {
-               .min = UINT_MAX, .max = 0, .integer = 1,
-       };
-       int i;
-
-       for (i = 0; i < CIP_SFC_COUNT; ++i) {
-               unsigned int step = amdtp_syt_intervals[i];
-               unsigned int rate = amdtp_rate_table[i];
-
-               if (s->min % step || s->max % step)
-                       continue;
-
-               t.min = min(t.min, rate);
-               t.max = max(t.max, rate);
-       }
-
-       return snd_interval_refine(r, &t);
-}
-
 /**
  * amdtp_stream_add_pcm_hw_constraints - add hw constraints for PCM substream
  * @s:         the AMDTP stream, which must be initialized.
@@ -250,24 +219,16 @@ int amdtp_stream_add_pcm_hw_constraints(struct amdtp_stream *s,
         */
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
                                  apply_constraint_to_size, NULL,
+                                 SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
                                  SNDRV_PCM_HW_PARAM_RATE, -1);
        if (err < 0)
                goto end;
-       err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
-                                 apply_constraint_to_rate, NULL,
-                                 SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1);
-       if (err < 0)
-               goto end;
        err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
                                  apply_constraint_to_size, NULL,
+                                 SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
                                  SNDRV_PCM_HW_PARAM_RATE, -1);
        if (err < 0)
                goto end;
-       err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
-                                 apply_constraint_to_rate, NULL,
-                                 SNDRV_PCM_HW_PARAM_BUFFER_SIZE, -1);
-       if (err < 0)
-               goto end;
 end:
        return err;
 }
index 0f6dbcffe711d62cfcba5fa7e57194f7f1a70e71..ed50b222d36ea0d4dd7ddd4440ca8f62ce0e7d8a 100644 (file)
@@ -240,8 +240,8 @@ static void dice_remove(struct fw_unit *unit)
        cancel_delayed_work_sync(&dice->dwork);
 
        if (dice->registered) {
-               /* No need to wait for releasing card object in this context. */
-               snd_card_free_when_closed(dice->card);
+               // Block till all of ALSA character devices are released.
+               snd_card_free(dice->card);
        }
 
        mutex_destroy(&dice->mutex);
index 04402c14cb2392276bb23308244c626b6539fc4e..9847b669cf3cf0fbc698e89fd78774ace013d619 100644 (file)
 #define SPI_PL_BIT_R_R         (2<<7)  /* right channel = right */
 #define SPI_PL_BIT_R_C         (3<<7)  /* right channel = (L+R)/2 */
 #define SPI_IZD_REG            2
-#define SPI_IZD_BIT            (1<<4)  /* infinite zero detect */
+#define SPI_IZD_BIT            (0<<4)  /* infinite zero detect */
 
 #define SPI_FMT_REG            3
 #define SPI_FMT_BIT_RJ         (0<<0)  /* right justified mode */
index 97f49b751e6eb3583c948e7e35c0ddc8860cc818..568575b72f2f7269c727a202f6544b6f51a57d03 100644 (file)
@@ -58,8 +58,8 @@ static void hda_fixup_thinkpad_acpi(struct hda_codec *codec,
                        removefunc = false;
                }
                if (led_set_func(TPACPI_LED_MICMUTE, false) >= 0 &&
-                   snd_hda_gen_add_micmute_led(codec,
-                                               update_tpacpi_micmute) > 0)
+                   !snd_hda_gen_add_micmute_led(codec,
+                                                update_tpacpi_micmute))
                        removefunc = false;
        }
 
index 5072cbd15c82955ce3fcf1a3ca828103b882248d..dae1584cf017f6aa311a5b78c3311b0bf55c2b18 100644 (file)
@@ -16,5 +16,6 @@
  */
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_NEW_STAT
 
 #include <asm-generic/unistd.h>
index 1b32b56a03d34ce2a5f0b7f79c621f87d8c89dbf..8c876c166ef27b2c6fa754781fdbb103f2addc54 100644 (file)
@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
 
 #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
 #define KVM_REG_PPC_ONLINE     (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
index 9a50f02b98946eb49df6cb5f407b1a4a04b89e6c..16511d97e8dc037c8c0b9a60b09a0adf409e6d20 100644 (file)
@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc {
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW       1
 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW      2
 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW      3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE         4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE                5
 
 /* kvm attributes for migration mode */
 #define KVM_S390_VM_MIGRATION_STOP     0
index 8a6eff9c27f3faf349b2b8ba192e55fe0f808f57..dabfcf7c3941aa90a92a91ee37f1164447c71655 100644 (file)
@@ -300,10 +300,7 @@ struct kvm_vcpu_events {
                __u8 injected;
                __u8 nr;
                __u8 has_error_code;
-               union {
-                       __u8 pad;
-                       __u8 pending;
-               };
+               __u8 pending;
                __u32 error_code;
        } exception;
        struct {
@@ -387,6 +384,7 @@ struct kvm_sync_regs {
 
 #define KVM_STATE_NESTED_GUEST_MODE    0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING   0x00000002
+#define KVM_STATE_NESTED_EVMCS         0x00000004
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE        0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON     0x00000002
index df4bedb9b01c281b7bf15048fef3063a35ede51c..538546edbfbd2bd1cfca431aa95864f018fcc7ee 100644 (file)
@@ -242,10 +242,12 @@ __SYSCALL(__NR_tee, sys_tee)
 /* fs/stat.c */
 #define __NR_readlinkat 78
 __SYSCALL(__NR_readlinkat, sys_readlinkat)
+#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
 #define __NR3264_fstatat 79
 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
 #define __NR3264_fstat 80
 __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
+#endif
 
 /* fs/sync.c */
 #define __NR_sync 81
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
new file mode 100644 (file)
index 0000000..a441ea1
--- /dev/null
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FS_H
+#define _UAPI_LINUX_FS_H
+
+/*
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's.  Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
+ */
+
+#include <linux/limits.h>
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
+ * the file limit at runtime and only root can increase the per-process
+ * nr_file rlimit, so it's safe to set up a ridiculously high absolute
+ * upper limit on files-per-process.
+ *
+ * Some programs (notably those using select()) may have to be 
+ * recompiled to take full advantage of the new limits..  
+ */
+
+/* Fixed constants first: */
+#undef NR_OPEN
+#define INR_OPEN_CUR 1024      /* Initial setting for nfile rlimits */
+#define INR_OPEN_MAX 4096      /* Hard limit for nfile rlimits */
+
+#define BLOCK_SIZE_BITS 10
+#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+
+#define SEEK_SET       0       /* seek relative to beginning of file */
+#define SEEK_CUR       1       /* seek relative to current file position */
+#define SEEK_END       2       /* seek relative to end of file */
+#define SEEK_DATA      3       /* seek to the next data */
+#define SEEK_HOLE      4       /* seek to the next hole */
+#define SEEK_MAX       SEEK_HOLE
+
+#define RENAME_NOREPLACE       (1 << 0)        /* Don't overwrite target */
+#define RENAME_EXCHANGE                (1 << 1)        /* Exchange source and dest */
+#define RENAME_WHITEOUT                (1 << 2)        /* Whiteout source */
+
+struct file_clone_range {
+       __s64 src_fd;
+       __u64 src_offset;
+       __u64 src_length;
+       __u64 dest_offset;
+};
+
+struct fstrim_range {
+       __u64 start;
+       __u64 len;
+       __u64 minlen;
+};
+
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define FILE_DEDUPE_RANGE_SAME         0
+#define FILE_DEDUPE_RANGE_DIFFERS      1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct file_dedupe_range_info {
+       __s64 dest_fd;          /* in - destination file */
+       __u64 dest_offset;      /* in - start of extent in destination */
+       __u64 bytes_deduped;    /* out - total # of bytes we were able
+                                * to dedupe from this file. */
+       /* status of this dedupe operation:
+        * < 0 for error
+        * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
+        * == FILE_DEDUPE_RANGE_DIFFERS if data differs
+        */
+       __s32 status;           /* out - see above description */
+       __u32 reserved;         /* must be zero */
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct file_dedupe_range {
+       __u64 src_offset;       /* in - start of extent in source */
+       __u64 src_length;       /* in - length of extent */
+       __u16 dest_count;       /* in - total elements in info array */
+       __u16 reserved1;        /* must be zero */
+       __u32 reserved2;        /* must be zero */
+       struct file_dedupe_range_info info[0];
+};
+
+/* And dynamically-tunable limits and defaults: */
+struct files_stat_struct {
+       unsigned long nr_files;         /* read only */
+       unsigned long nr_free_files;    /* read only */
+       unsigned long max_files;                /* tunable */
+};
+
+struct inodes_stat_t {
+       long nr_inodes;
+       long nr_unused;
+       long dummy[5];          /* padding for sysctl ABI compatibility */
+};
+
+
+#define NR_FILE  8192  /* this can well be larger on a larger system */
+
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ */
+#define MS_RDONLY       1      /* Mount read-only */
+#define MS_NOSUID       2      /* Ignore suid and sgid bits */
+#define MS_NODEV        4      /* Disallow access to device special files */
+#define MS_NOEXEC       8      /* Disallow program execution */
+#define MS_SYNCHRONOUS 16      /* Writes are synced at once */
+#define MS_REMOUNT     32      /* Alter flags of a mounted FS */
+#define MS_MANDLOCK    64      /* Allow mandatory locks on an FS */
+#define MS_DIRSYNC     128     /* Directory modifications are synchronous */
+#define MS_NOATIME     1024    /* Do not update access times. */
+#define MS_NODIRATIME  2048    /* Do not update directory access times */
+#define MS_BIND                4096
+#define MS_MOVE                8192
+#define MS_REC         16384
+#define MS_VERBOSE     32768   /* War is peace. Verbosity is silence.
+                                  MS_VERBOSE is deprecated. */
+#define MS_SILENT      32768
+#define MS_POSIXACL    (1<<16) /* VFS does not apply the umask */
+#define MS_UNBINDABLE  (1<<17) /* change to unbindable */
+#define MS_PRIVATE     (1<<18) /* change to private */
+#define MS_SLAVE       (1<<19) /* change to slave */
+#define MS_SHARED      (1<<20) /* change to shared */
+#define MS_RELATIME    (1<<21) /* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT   (1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION   (1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME    (1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT     (1<<26)
+#define MS_NOREMOTELOCK        (1<<27)
+#define MS_NOSEC       (1<<28)
+#define MS_BORN                (1<<29)
+#define MS_ACTIVE      (1<<30)
+#define MS_NOUSER      (1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK    (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+                        MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+       __u32           fsx_xflags;     /* xflags field value (get/set) */
+       __u32           fsx_extsize;    /* extsize field value (get/set)*/
+       __u32           fsx_nextents;   /* nextents field value (get)   */
+       __u32           fsx_projid;     /* project identifier (get/set) */
+       __u32           fsx_cowextsize; /* CoW extsize field value (get/set)*/
+       unsigned char   fsx_pad[8];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME      0x00000001      /* data in realtime volume */
+#define FS_XFLAG_PREALLOC      0x00000002      /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE     0x00000008      /* file cannot be modified */
+#define FS_XFLAG_APPEND                0x00000010      /* all writes append */
+#define FS_XFLAG_SYNC          0x00000020      /* all writes synchronous */
+#define FS_XFLAG_NOATIME       0x00000040      /* do not update access time */
+#define FS_XFLAG_NODUMP                0x00000080      /* do not include in backups */
+#define FS_XFLAG_RTINHERIT     0x00000100      /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT   0x00000200      /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS    0x00000400      /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE       0x00000800      /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT  0x00001000      /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG      0x00002000      /* do not defragment */
+#define FS_XFLAG_FILESTREAM    0x00004000      /* use filestream allocator */
+#define FS_XFLAG_DAX           0x00008000      /* use DAX for IO */
+#define FS_XFLAG_COWEXTSIZE    0x00010000      /* CoW extent size allocator hint */
+#define FS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this   */
+
+/* the read-only stuff doesn't really belong here, but any other place is
+   probably as bad and I don't want to create yet another include file. */
+
+#define BLKROSET   _IO(0x12,93)        /* set device read-only (0 = read-write) */
+#define BLKROGET   _IO(0x12,94)        /* get read-only status (0 = read_write) */
+#define BLKRRPART  _IO(0x12,95)        /* re-read partition table */
+#define BLKGETSIZE _IO(0x12,96)        /* return device size /512 (long *arg) */
+#define BLKFLSBUF  _IO(0x12,97)        /* flush buffer cache */
+#define BLKRASET   _IO(0x12,98)        /* set read ahead for block device */
+#define BLKRAGET   _IO(0x12,99)        /* get current read ahead setting */
+#define BLKFRASET  _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
+#define BLKFRAGET  _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
+#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
+#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
+#define BLKSSZGET  _IO(0x12,104)/* get block device sector size */
+#if 0
+#define BLKPG      _IO(0x12,105)/* See blkpg.h */
+
+/* Some people are morons.  Do not use sizeof! */
+
+#define BLKELVGET  _IOR(0x12,106,size_t)/* elevator get */
+#define BLKELVSET  _IOW(0x12,107,size_t)/* elevator set */
+/* This was here just to show that the number is taken -
+   probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
+#endif
+/* A jump here: 108-111 have been used for various private purposes. */
+#define BLKBSZGET  _IOR(0x12,112,size_t)
+#define BLKBSZSET  _IOW(0x12,113,size_t)
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)     /* return device size in bytes (u64 *arg) */
+#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
+#define BLKTRACESTART _IO(0x12,116)
+#define BLKTRACESTOP _IO(0x12,117)
+#define BLKTRACETEARDOWN _IO(0x12,118)
+#define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
+#define BLKDISCARDZEROES _IO(0x12,124)
+#define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
+/*
+ * A jump here: 130-131 are reserved for zoned block devices
+ * (see uapi/linux/blkzoned.h)
+ */
+
+#define BMAP_IOCTL 1           /* obsolete - kept for compatibility */
+#define FIBMAP    _IO(0x00,1)  /* bmap access */
+#define FIGETBSZ   _IO(0x00,2) /* get the block size used for bmap */
+#define FIFREEZE       _IOWR('X', 119, int)    /* Freeze */
+#define FITHAW         _IOWR('X', 120, int)    /* Thaw */
+#define FITRIM         _IOWR('X', 121, struct fstrim_range)    /* Trim */
+#define FICLONE                _IOW(0x94, 9, int)
+#define FICLONERANGE   _IOW(0x94, 13, struct file_clone_range)
+#define FIDEDUPERANGE  _IOWR(0x94, 54, struct file_dedupe_range)
+
+#define FSLABEL_MAX 256        /* Max chars for the interface; each fs may differ */
+
+#define        FS_IOC_GETFLAGS                 _IOR('f', 1, long)
+#define        FS_IOC_SETFLAGS                 _IOW('f', 2, long)
+#define        FS_IOC_GETVERSION               _IOR('v', 1, long)
+#define        FS_IOC_SETVERSION               _IOW('v', 2, long)
+#define FS_IOC_FIEMAP                  _IOWR('f', 11, struct fiemap)
+#define FS_IOC32_GETFLAGS              _IOR('f', 1, int)
+#define FS_IOC32_SETFLAGS              _IOW('f', 2, int)
+#define FS_IOC32_GETVERSION            _IOR('v', 1, int)
+#define FS_IOC32_SETVERSION            _IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR              _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR              _IOW('X', 32, struct fsxattr)
+#define FS_IOC_GETFSLABEL              _IOR(0x94, 49, char[FSLABEL_MAX])
+#define FS_IOC_SETFSLABEL              _IOW(0x94, 50, char[FSLABEL_MAX])
+
+/*
+ * File system encryption support
+ */
+/* Policy provided via an ioctl on the topmost directory */
+#define FS_KEY_DESCRIPTOR_SIZE 8
+
+#define FS_POLICY_FLAGS_PAD_4          0x00
+#define FS_POLICY_FLAGS_PAD_8          0x01
+#define FS_POLICY_FLAGS_PAD_16         0x02
+#define FS_POLICY_FLAGS_PAD_32         0x03
+#define FS_POLICY_FLAGS_PAD_MASK       0x03
+#define FS_POLICY_FLAGS_VALID          0x03
+
+/* Encryption algorithms */
+#define FS_ENCRYPTION_MODE_INVALID             0
+#define FS_ENCRYPTION_MODE_AES_256_XTS         1
+#define FS_ENCRYPTION_MODE_AES_256_GCM         2
+#define FS_ENCRYPTION_MODE_AES_256_CBC         3
+#define FS_ENCRYPTION_MODE_AES_256_CTS         4
+#define FS_ENCRYPTION_MODE_AES_128_CBC         5
+#define FS_ENCRYPTION_MODE_AES_128_CTS         6
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS    7 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS    8 /* Removed, do not use. */
+
+struct fscrypt_policy {
+       __u8 version;
+       __u8 contents_encryption_mode;
+       __u8 filenames_encryption_mode;
+       __u8 flags;
+       __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define FS_IOC_SET_ENCRYPTION_POLICY   _IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT   _IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY   _IOW('f', 21, struct fscrypt_policy)
+
+/* Parameters for passing an encryption key into the kernel keyring */
+#define FS_KEY_DESC_PREFIX             "fscrypt:"
+#define FS_KEY_DESC_PREFIX_SIZE                8
+
+/* Structure that userspace passes to the kernel keyring */
+#define FS_MAX_KEY_SIZE                        64
+
+struct fscrypt_key {
+       __u32 mode;
+       __u8 raw[FS_MAX_KEY_SIZE];
+       __u32 size;
+};
+
+/*
+ * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
+ * almost out of 32-bit flags.  :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface.  This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
+ */
+#define        FS_SECRM_FL                     0x00000001 /* Secure deletion */
+#define        FS_UNRM_FL                      0x00000002 /* Undelete */
+#define        FS_COMPR_FL                     0x00000004 /* Compress file */
+#define FS_SYNC_FL                     0x00000008 /* Synchronous updates */
+#define FS_IMMUTABLE_FL                        0x00000010 /* Immutable file */
+#define FS_APPEND_FL                   0x00000020 /* writes to file may only append */
+#define FS_NODUMP_FL                   0x00000040 /* do not dump file */
+#define FS_NOATIME_FL                  0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define FS_DIRTY_FL                    0x00000100
+#define FS_COMPRBLK_FL                 0x00000200 /* One or more compressed clusters */
+#define FS_NOCOMP_FL                   0x00000400 /* Don't compress */
+/* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL                  0x00000800 /* Encrypted file */
+#define FS_BTREE_FL                    0x00001000 /* btree format dir */
+#define FS_INDEX_FL                    0x00001000 /* hash-indexed directory */
+#define FS_IMAGIC_FL                   0x00002000 /* AFS directory */
+#define FS_JOURNAL_DATA_FL             0x00004000 /* Reserved for ext3 */
+#define FS_NOTAIL_FL                   0x00008000 /* file tail should not be merged */
+#define FS_DIRSYNC_FL                  0x00010000 /* dirsync behaviour (directories only) */
+#define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL                        0x00040000 /* Reserved for ext4 */
+#define FS_EXTENT_FL                   0x00080000 /* Extents */
+#define FS_EA_INODE_FL                 0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL                        0x00400000 /* Reserved for ext4 */
+#define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL              0x10000000 /* Reserved for ext4 */
+#define FS_PROJINHERIT_FL              0x20000000 /* Create with parents projid */
+#define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
+
+#define FS_FL_USER_VISIBLE             0x0003DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE          0x000380FF /* User modifiable flags */
+
+
+#define SYNC_FILE_RANGE_WAIT_BEFORE    1
+#define SYNC_FILE_RANGE_WRITE          2
+#define SYNC_FILE_RANGE_WAIT_AFTER     4
+
+/*
+ * Flags for preadv2/pwritev2:
+ */
+
+typedef int __bitwise __kernel_rwf_t;
+
+/* high priority request, poll if possible */
+#define RWF_HIPRI      ((__force __kernel_rwf_t)0x00000001)
+
+/* per-IO O_DSYNC */
+#define RWF_DSYNC      ((__force __kernel_rwf_t)0x00000002)
+
+/* per-IO O_SYNC */
+#define RWF_SYNC       ((__force __kernel_rwf_t)0x00000004)
+
+/* per-IO, return -EAGAIN if operation would block */
+#define RWF_NOWAIT     ((__force __kernel_rwf_t)0x00000008)
+
+/* per-IO O_APPEND */
+#define RWF_APPEND     ((__force __kernel_rwf_t)0x00000010)
+
+/* mask of flags supported by the kernel */
+#define RWF_SUPPORTED  (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
+                        RWF_APPEND)
+
+#endif /* _UAPI_LINUX_FS_H */
index 58faab897201f7c62a77ea336d4f16707b433854..1debfa42cba1a965fcca8532d8b63c0bf4cf7949 100644 (file)
@@ -287,6 +287,7 @@ enum {
        IFLA_BR_MCAST_STATS_ENABLED,
        IFLA_BR_MCAST_IGMP_VERSION,
        IFLA_BR_MCAST_MLD_VERSION,
+       IFLA_BR_VLAN_STATS_PER_PORT,
        __IFLA_BR_MAX,
 };
 
index 2875ce85b3226c824a9fdf48fc76be4a14645694..2b7a652c9fa4635b3b83d97f644a9e3ecb0866a0 100644 (file)
@@ -420,13 +420,19 @@ struct kvm_run {
 struct kvm_coalesced_mmio_zone {
        __u64 addr;
        __u32 size;
-       __u32 pad;
+       union {
+               __u32 pad;
+               __u32 pio;
+       };
 };
 
 struct kvm_coalesced_mmio {
        __u64 phys_addr;
        __u32 len;
-       __u32 pad;
+       union {
+               __u32 pad;
+               __u32 pio;
+       };
        __u8  data[8];
 };
 
@@ -751,6 +757,15 @@ struct kvm_ppc_resize_hpt {
 
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
+/*
+ * On arm64, machine type can be used to request the physical
+ * address size for the VM. Bits[7-0] are reserved for the guest
+ * PA size shift (i.e, log2(PA_Size)). For backward compatibility,
+ * value 0 implies the default IPA size, 40bits.
+ */
+#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK  0xffULL
+#define KVM_VM_TYPE_ARM_IPA_SIZE(x)            \
+       ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
 /*
  * ioctls for /dev/kvm fds:
  */
@@ -958,6 +973,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_SEND_IPI 161
 #define KVM_CAP_COALESCED_PIO 162
 #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
+#define KVM_CAP_EXCEPTION_PAYLOAD 164
+#define KVM_CAP_ARM_VM_IPA_SIZE 165
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
index bfd5938fede6c1ba3b71d096cd36127da2837042..d0f515d53299ea5784ffdb61dd1b829b04fd045c 100644 (file)
@@ -28,7 +28,9 @@
 #define MAP_HUGE_2MB   HUGETLB_FLAG_ENCODE_2MB
 #define MAP_HUGE_8MB   HUGETLB_FLAG_ENCODE_8MB
 #define MAP_HUGE_16MB  HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_32MB  HUGETLB_FLAG_ENCODE_32MB
 #define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
 #define MAP_HUGE_1GB   HUGETLB_FLAG_ENCODE_1GB
 #define MAP_HUGE_2GB   HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB  HUGETLB_FLAG_ENCODE_16GB
index 776bc92e91180725e75f0291b1635234d6b6875f..486ed1f0c0bc17f48dca895ebf9581aa7d69278d 100644 (file)
@@ -155,6 +155,7 @@ enum nlmsgerr_attrs {
 #define NETLINK_LIST_MEMBERSHIPS       9
 #define NETLINK_CAP_ACK                        10
 #define NETLINK_EXT_ACK                        11
+#define NETLINK_DUMP_STRICT_CHK                12
 
 struct nl_pktinfo {
        __u32   group;
index f35eb72739c09e3ad0bd22e279fa4a33119c15f6..9de8780ac8d97568932d3857de3dc2c8e5de2806 100644 (file)
@@ -646,10 +646,12 @@ struct perf_event_mmap_page {
  *
  *   PERF_RECORD_MISC_MMAP_DATA  - PERF_RECORD_MMAP* events
  *   PERF_RECORD_MISC_COMM_EXEC  - PERF_RECORD_COMM event
+ *   PERF_RECORD_MISC_FORK_EXEC  - PERF_RECORD_FORK event (perf internal)
  *   PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
  */
 #define PERF_RECORD_MISC_MMAP_DATA             (1 << 13)
 #define PERF_RECORD_MISC_COMM_EXEC             (1 << 13)
+#define PERF_RECORD_MISC_FORK_EXEC             (1 << 13)
 #define PERF_RECORD_MISC_SWITCH_OUT            (1 << 13)
 /*
  * These PERF_RECORD_MISC_* flags below are safely reused
index ed0a120d4f084fa0cfc10b5257ea9bce0c3d3a24..404d4b9ffe7644553a1b59fba043b151d935a2e9 100644 (file)
@@ -752,7 +752,7 @@ struct snd_timer_info {
 #define SNDRV_TIMER_PSFLG_EARLY_EVENT  (1<<2)  /* write early event to the poll queue */
 
 struct snd_timer_params {
-       unsigned int flags;             /* flags - SNDRV_MIXER_PSFLG_* */
+       unsigned int flags;             /* flags - SNDRV_TIMER_PSFLG_* */
        unsigned int ticks;             /* requested resolution in ticks */
        unsigned int queue_size;        /* total size of queue (32-1024) */
        unsigned int reserved0;         /* reserved, was: failure locations */
index b607be7236d3e580fda376f04cbe62302f698e4f..d6e62e90e8d44df2a3b710b6cfd875b32e20daec 100644 (file)
@@ -2084,19 +2084,19 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
        prog->expected_attach_type = type;
 }
 
-#define BPF_PROG_SEC_IMPL(string, ptype, eatype, atype) \
-       { string, sizeof(string) - 1, ptype, eatype, atype }
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \
+       { string, sizeof(string) - 1, ptype, eatype, is_attachable, atype }
 
 /* Programs that can NOT be attached. */
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, -EINVAL)
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0)
 
 /* Programs that can be attached. */
 #define BPF_APROG_SEC(string, ptype, atype) \
-       BPF_PROG_SEC_IMPL(string, ptype, 0, atype)
+       BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype)
 
 /* Programs that must specify expected attach type at load time. */
 #define BPF_EAPROG_SEC(string, ptype, eatype) \
-       BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype)
+       BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype)
 
 /* Programs that can be attached but attach type can't be identified by section
  * name. Kept for backward compatibility.
@@ -2108,6 +2108,7 @@ static const struct {
        size_t len;
        enum bpf_prog_type prog_type;
        enum bpf_attach_type expected_attach_type;
+       int is_attachable;
        enum bpf_attach_type attach_type;
 } section_names[] = {
        BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
@@ -2198,7 +2199,7 @@ int libbpf_attach_type_by_name(const char *name,
        for (i = 0; i < ARRAY_SIZE(section_names); i++) {
                if (strncmp(name, section_names[i].sec, section_names[i].len))
                        continue;
-               if (section_names[i].attach_type == -EINVAL)
+               if (!section_names[i].is_attachable)
                        return -EINVAL;
                *attach_type = section_names[i].attach_type;
                return 0;
index cb7154eccbdc1e6a825060b137155ce4560df2a4..dbb9efbf718a065d4d6e9998fa4d147b1cc691fc 100644 (file)
@@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p,
                case OPTION_INTEGER:
                case OPTION_UINTEGER:
                case OPTION_LONG:
+               case OPTION_ULONG:
                case OPTION_U64:
                default:
                        break;
@@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p,
                case OPTION_INTEGER:
                case OPTION_UINTEGER:
                case OPTION_LONG:
+               case OPTION_ULONG:
                case OPTION_U64:
                default:
                        break;
@@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p,
                        return opterror(opt, "expects a numerical value", flags);
                return 0;
 
+       case OPTION_ULONG:
+               if (unset) {
+                       *(unsigned long *)opt->value = 0;
+                       return 0;
+               }
+               if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+                       *(unsigned long *)opt->value = opt->defval;
+                       return 0;
+               }
+               if (get_arg(p, opt, flags, &arg))
+                       return -1;
+               *(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
+               if (*s)
+                       return opterror(opt, "expects a numerical value", flags);
+               return 0;
+
        case OPTION_U64:
                if (unset) {
                        *(u64 *)opt->value = 0;
@@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full)
        case OPTION_ARGUMENT:
                break;
        case OPTION_LONG:
+       case OPTION_ULONG:
        case OPTION_U64:
        case OPTION_INTEGER:
        case OPTION_UINTEGER:
index 92fdbe1519f6d8f2aebfd8cf05a119bea7be344a..6ca2a8bfe716b1c658f693b29c64b2dd0b79642e 100644 (file)
@@ -25,6 +25,7 @@ enum parse_opt_type {
        OPTION_STRING,
        OPTION_INTEGER,
        OPTION_LONG,
+       OPTION_ULONG,
        OPTION_CALLBACK,
        OPTION_U64,
        OPTION_UINTEGER,
@@ -133,6 +134,7 @@ struct option {
 #define OPT_INTEGER(s, l, v, h)     { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
 #define OPT_UINTEGER(s, l, v, h)    { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
 #define OPT_LONG(s, l, v, h)        { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
+#define OPT_ULONG(s, l, v, h)        { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
 #define OPT_U64(s, l, v, h)         { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
 #define OPT_STRING(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
 #define OPT_STRING_OPTARG(s, l, v, a, h, d) \
index 2928939b98ec208fee9069cfc3ddf4aea91f01f3..0414a0d522621d4ca973240979e89c07d6cd4f8a 100644 (file)
@@ -836,7 +836,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
        struct symbol *pfunc = insn->func->pfunc;
        unsigned int prev_offset = 0;
 
-       list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+       list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) {
                if (rela == next_table)
                        break;
 
@@ -926,6 +926,7 @@ static struct rela *find_switch_table(struct objtool_file *file,
 {
        struct rela *text_rela, *rodata_rela;
        struct instruction *orig_insn = insn;
+       struct section *rodata_sec;
        unsigned long table_offset;
 
        /*
@@ -953,10 +954,13 @@ static struct rela *find_switch_table(struct objtool_file *file,
                /* look for a relocation which references .rodata */
                text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
                                                    insn->len);
-               if (!text_rela || text_rela->sym != file->rodata->sym)
+               if (!text_rela || text_rela->sym->type != STT_SECTION ||
+                   !text_rela->sym->sec->rodata)
                        continue;
 
                table_offset = text_rela->addend;
+               rodata_sec = text_rela->sym->sec;
+
                if (text_rela->type == R_X86_64_PC32)
                        table_offset += 4;
 
@@ -964,10 +968,10 @@ static struct rela *find_switch_table(struct objtool_file *file,
                 * Make sure the .rodata address isn't associated with a
                 * symbol.  gcc jump tables are anonymous data.
                 */
-               if (find_symbol_containing(file->rodata, table_offset))
+               if (find_symbol_containing(rodata_sec, table_offset))
                        continue;
 
-               rodata_rela = find_rela_by_dest(file->rodata, table_offset);
+               rodata_rela = find_rela_by_dest(rodata_sec, table_offset);
                if (rodata_rela) {
                        /*
                         * Use of RIP-relative switch jumps is quite rare, and
@@ -1052,7 +1056,7 @@ static int add_switch_table_alts(struct objtool_file *file)
        struct symbol *func;
        int ret;
 
-       if (!file->rodata || !file->rodata->rela)
+       if (!file->rodata)
                return 0;
 
        for_each_sec(file, sec) {
@@ -1198,10 +1202,33 @@ static int read_retpoline_hints(struct objtool_file *file)
        return 0;
 }
 
+static void mark_rodata(struct objtool_file *file)
+{
+       struct section *sec;
+       bool found = false;
+
+       /*
+        * This searches for the .rodata section or multiple .rodata.func_name
+        * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8
+        * rodata sections are ignored as they don't contain jump tables.
+        */
+       for_each_sec(file, sec) {
+               if (!strncmp(sec->name, ".rodata", 7) &&
+                   !strstr(sec->name, ".str1.")) {
+                       sec->rodata = true;
+                       found = true;
+               }
+       }
+
+       file->rodata = found;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
        int ret;
 
+       mark_rodata(file);
+
        ret = decode_instructions(file);
        if (ret)
                return ret;
@@ -2171,7 +2198,6 @@ int check(const char *_objname, bool orc)
        INIT_LIST_HEAD(&file.insn_list);
        hash_init(file.insn_hash);
        file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
-       file.rodata = find_section_by_name(file.elf, ".rodata");
        file.c_file = find_section_by_name(file.elf, ".comment");
        file.ignore_unreachables = no_unreachable;
        file.hints = false;
index 95700a2bcb7c1ee429c6b2e0270428a5bfb8b54a..e6e8a655b5563e84bcd5d67eee13a446db81cc31 100644 (file)
@@ -60,8 +60,8 @@ struct objtool_file {
        struct elf *elf;
        struct list_head insn_list;
        DECLARE_HASHTABLE(insn_hash, 16);
-       struct section *rodata, *whitelist;
-       bool ignore_unreachables, c_file, hints;
+       struct section *whitelist;
+       bool ignore_unreachables, c_file, hints, rodata;
 };
 
 int check(const char *objname, bool orc);
index 7ec85d567598c5047fbe00b9660c9e7fc76870cf..6dbb9fae0f9d4f4c54e78431f84ea3e034e07f0a 100644 (file)
@@ -301,7 +301,7 @@ static int read_symbols(struct elf *elf)
                        if (sym->type != STT_FUNC)
                                continue;
                        sym->pfunc = sym->cfunc = sym;
-                       coldstr = strstr(sym->name, ".cold.");
+                       coldstr = strstr(sym->name, ".cold");
                        if (!coldstr)
                                continue;
 
@@ -379,6 +379,7 @@ static int read_relas(struct elf *elf)
                        rela->offset = rela->rela.r_offset;
                        symndx = GELF_R_SYM(rela->rela.r_info);
                        rela->sym = find_symbol_by_index(elf, symndx);
+                       rela->rela_sec = sec;
                        if (!rela->sym) {
                                WARN("can't find rela entry symbol %d for %s",
                                     symndx, sec->name);
index de5cd2ddded987bf524be46e446bd1e814422761..bc97ed86b9cd8ebd3fc8e9e1512d8d06b3e96d14 100644 (file)
@@ -48,7 +48,7 @@ struct section {
        char *name;
        int idx;
        unsigned int len;
-       bool changed, text;
+       bool changed, text, rodata;
 };
 
 struct symbol {
@@ -68,6 +68,7 @@ struct rela {
        struct list_head list;
        struct hlist_node hash;
        GElf_Rela rela;
+       struct section *rela_sec;
        struct symbol *sym;
        unsigned int type;
        unsigned long offset;
diff --git a/tools/perf/Documentation/build-xed.txt b/tools/perf/Documentation/build-xed.txt
new file mode 100644 (file)
index 0000000..6222c1e
--- /dev/null
@@ -0,0 +1,19 @@
+
+For --xed the xed tool is needed. Here is how to install it:
+
+  $ git clone https://github.com/intelxed/mbuild.git mbuild
+  $ git clone https://github.com/intelxed/xed
+  $ cd xed
+  $ ./mfile.py --share
+  $ ./mfile.py examples
+  $ sudo ./mfile.py --prefix=/usr/local install
+  $ sudo ldconfig
+  $ sudo cp obj/examples/xed /usr/local/bin
+
+Basic xed testing:
+
+  $ xed | head -3
+  ERROR: required argument(s) were missing
+  Copyright (C) 2017, Intel Corporation. All rights reserved.
+  XED version: [v10.0-328-g7d62c8c49b7b]
+  $
index 76971d2e416450c24fbb24bb51db584da7661180..115eaacc455fdb020ca34f3c16fb1bcad092b9d1 100644 (file)
@@ -106,7 +106,7 @@ in transaction, respectively.
 While it is possible to create scripts to analyze the data, an alternative
 approach is available to export the data to a sqlite or postgresql database.
 Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
-and to script call-graph-from-sql.py for an example of using the database.
+and to script exported-sql-viewer.py for an example of using the database.
 
 There is also script intel-pt-events.py which provides an example of how to
 unpack the raw data for power events and PTWRITE.
index a3abe04c779d03615a9ba3815337bf1345b45082..c2182cbabde3a07196b26e44a3a8c6a0165d3cb4 100644 (file)
                l       synthesize last branch entries (use with i or x)
                s       skip initial number of events
 
-       The default is all events i.e. the same as --itrace=ibxwpe
+       The default is all events i.e. the same as --itrace=ibxwpe,
+       except for perf script where it is --itrace=ce
 
-       In addition, the period (default 100000) for instructions events
-       can be specified in units of:
+       In addition, the period (default 100000, except for perf script where it is 1)
+       for instructions events can be specified in units of:
 
                i       instructions
                t       ticks
index afdafe2110a17adea848871e033a85f85972b4a1..a2b37ce48094de7e66bc3479c9aced16ef128002 100644 (file)
@@ -383,6 +383,24 @@ include::itrace.txt[]
        will be printed. Each entry has function name and file/line. Enabled by
        default, disable with --no-inline.
 
+--insn-trace::
+       Show instruction stream for intel_pt traces. Combine with --xed to
+       show disassembly.
+
+--xed::
+       Run xed disassembler on output. Requires installing the xed disassembler.
+
+--call-trace::
+       Show call stream for intel_pt traces. The CPUs are interleaved, but
+       can be filtered with -C.
+
+--call-ret-trace::
+       Show call and return stream for intel_pt traces.
+
+--graph-function::
+       For itrace only show specified functions and their callees for
+       itrace. Multiple functions can be separated by comma.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
index 114fda12aa490089c16802282ed598f51303bd10..808b664343c9f5faa9a07d5327945926d844c7cb 100644 (file)
@@ -242,6 +242,16 @@ Default is to monitor all CPUS.
 --hierarchy::
        Enable hierarchy output.
 
+--overwrite::
+       Enable this to use just the most recent records, which helps in high core count
+       machines such as Knights Landing/Mill, but right now is disabled by default as
+       the pausing used in this technique is leading to loss of metadata events such
+       as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading
+       to lots of unknown samples appearing on the UI. Enable this if you are in such
+       machines and profiling a workload that doesn't creates short lived threads and/or
+       doesn't uses many executable mmap operations. Work is being planed to solve
+       this situation, till then, this will remain disabled by default.
+
 --force::
        Don't do ownership validation.
 
index 115db9e06ecd8a8d3f53059b8a8aa28c2337584a..e113450503d2f6fdf63d356adff4b9c580bd640d 100644 (file)
@@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --kernel-syscall-graph::
         Show the kernel callchains on the syscall exit path.
 
+--max-events=N::
+       Stop after processing N events. Note that strace-like events are considered
+       only at exit time or when a syscall is interrupted, i.e. in those cases this
+       option is equivalent to the number of lines printed.
+
 --max-stack::
         Set the stack depth limit when parsing the callchain, anything
         beyond the specified depth will be ignored. Note that at this point
@@ -238,6 +243,68 @@ Trace syscalls, major and minor pagefaults:
   As you can see, there was major pagefault in python process, from
   CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
 
+Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):
+
+  $ perf trace -e open* --max-events 4
+  [root@jouet perf]# trace -e open* --max-events 4
+  2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
+  2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+  3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+  4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
+  $
+
+Trace the first minor page fault when running a workload:
+
+  # perf trace -F min --max-stack=7 --max-events 1 sleep 1
+     0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
+                                       __clear_user ([kernel.kallsyms])
+                                       load_elf_binary ([kernel.kallsyms])
+                                       search_binary_handler ([kernel.kallsyms])
+                                       __do_execve_file.isra.33 ([kernel.kallsyms])
+                                       __x64_sys_execve ([kernel.kallsyms])
+                                       do_syscall_64 ([kernel.kallsyms])
+                                       entry_SYSCALL_64 ([kernel.kallsyms])
+  #
+
+Trace the next min page page fault to take place on the first CPU:
+
+  # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
+     0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
+                                       js::gc::FreeSpan::initAsEmpty (inlined)
+                                       js::gc::Arena::setAsNotAllocated (inlined)
+                                       js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
+                                       js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+                                       js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
+                                       JSThinInlineString::new_<(js::AllowGC)1> (inlined)
+                                       AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
+                                       js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+                                       [0x18b26e6bc2bd] (/tmp/perf-17136.map)
+  #
+
+Trace the next two sched:sched_switch events, four block:*_plug events, the
+next block:*_unplug and the next three net:*dev_queue events, this last one
+with a backtrace of at most 16 entries, system wide:
+
+  # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
+     0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120]
+     0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120]
+   254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66
+                                       __dev_queue_xmit ([kernel.kallsyms])
+   273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78
+                                       __dev_queue_xmit ([kernel.kallsyms])
+   274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78
+                                       __dev_queue_xmit ([kernel.kallsyms])
+  2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58]
+  2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1
+  4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
+  8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+  8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+  #
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
index 2f3bf025e3050f94252c5ebd0a2ebdd18a5ce819..3ccb4f0bf0883cd80a8b33f612c649e84aaec126 100644 (file)
@@ -1,4 +1,5 @@
 include ../scripts/Makefile.include
+include ../scripts/Makefile.arch
 
 # The default target of this Makefile is...
 all:
@@ -385,6 +386,8 @@ export INSTALL SHELL_PATH
 SHELL = $(SHELL_PATH)
 
 linux_uapi_dir := $(srctree)/tools/include/uapi/linux
+asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
+arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/
 
 beauty_outdir := $(OUTPUT)trace/beauty/generated
 beauty_ioctl_outdir := $(beauty_outdir)/ioctl
@@ -460,6 +463,18 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
 $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
        $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
 
+mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c
+mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
+
+$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl)
+       $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+
+mount_flags_array := $(beauty_outdir)/mount_flags_array.c
+mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
+
+$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl)
+       $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@
+
 prctl_option_array := $(beauty_outdir)/prctl_option_array.c
 prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
 prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
@@ -577,6 +592,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
        $(socket_ipproto_array) \
        $(vhost_virtio_ioctl_array) \
        $(madvise_behavior_array) \
+       $(mmap_flags_array) \
+       $(mount_flags_array) \
        $(perf_ioctl_array) \
        $(prctl_option_array) \
        $(arch_errno_name_array)
@@ -863,6 +880,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
                $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
                $(OUTPUT)pmu-events/pmu-events.c \
                $(OUTPUT)$(madvise_behavior_array) \
+               $(OUTPUT)$(mmap_flags_array) \
+               $(OUTPUT)$(mount_flags_array) \
                $(OUTPUT)$(drm_ioctl_array) \
                $(OUTPUT)$(pkey_alloc_access_rights_array) \
                $(OUTPUT)$(sndrv_ctl_ioctl_array) \
index 2dbb8cade048f76b4b43d88d5d9e27c09e025e0f..c88fd32563ebc013e6f261f072dd57e37444e236 100755 (executable)
@@ -23,7 +23,7 @@ create_table_from_c()
 {
        local sc nr last_sc
 
-       create_table_exe=`mktemp /tmp/create-table-XXXXXX`
+       create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
 
        {
 
index 7fbca175099ec917ad69b8025c8249ee6c52a6a4..275dea7ff59a092b96561706306a36860955ceb9 100644 (file)
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c
new file mode 100644 (file)
index 0000000..2614c01
--- /dev/null
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+static int is_branch_cond(const char *cond)
+{
+       if (cond[0] == '\0')
+               return 1;
+
+       if (cond[0] == 'a' && cond[1] == '\0')
+               return 1;
+
+       if (cond[0] == 'c' &&
+           (cond[1] == 'c' || cond[1] == 's') &&
+           cond[2] == '\0')
+               return 1;
+
+       if (cond[0] == 'e' &&
+           (cond[1] == '\0' ||
+            (cond[1] == 'q' && cond[2] == '\0')))
+               return 1;
+
+       if (cond[0] == 'g' &&
+           (cond[1] == '\0' ||
+            (cond[1] == 't' && cond[2] == '\0') ||
+            (cond[1] == 'e' && cond[2] == '\0') ||
+            (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+               return 1;
+
+       if (cond[0] == 'l' &&
+           (cond[1] == '\0' ||
+            (cond[1] == 't' && cond[2] == '\0') ||
+            (cond[1] == 'u' && cond[2] == '\0') ||
+            (cond[1] == 'e' && cond[2] == '\0') ||
+            (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+               return 1;
+
+       if (cond[0] == 'n' &&
+           (cond[1] == '\0' ||
+            (cond[1] == 'e' && cond[2] == '\0') ||
+            (cond[1] == 'z' && cond[2] == '\0') ||
+            (cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0')))
+               return 1;
+
+       if (cond[0] == 'b' &&
+           cond[1] == 'p' &&
+           cond[2] == 'o' &&
+           cond[3] == 's' &&
+           cond[4] == '\0')
+               return 1;
+
+       if (cond[0] == 'v' &&
+           (cond[1] == 'c' || cond[1] == 's') &&
+           cond[2] == '\0')
+               return 1;
+
+       if (cond[0] == 'b' &&
+           cond[1] == 'z' &&
+           cond[2] == '\0')
+               return 1;
+
+       return 0;
+}
+
+static int is_branch_reg_cond(const char *cond)
+{
+       if ((cond[0] == 'n' || cond[0] == 'l') &&
+           cond[1] == 'z' &&
+           cond[2] == '\0')
+               return 1;
+
+       if (cond[0] == 'z' &&
+           cond[1] == '\0')
+               return 1;
+
+       if ((cond[0] == 'g' || cond[0] == 'l') &&
+           cond[1] == 'e' &&
+           cond[2] == 'z' &&
+           cond[3] == '\0')
+               return 1;
+
+       if (cond[0] == 'g' &&
+           cond[1] == 'z' &&
+           cond[2] == '\0')
+               return 1;
+
+       return 0;
+}
+
+static int is_branch_float_cond(const char *cond)
+{
+       if (cond[0] == '\0')
+               return 1;
+
+       if ((cond[0] == 'a' || cond[0] == 'e' ||
+            cond[0] == 'z' || cond[0] == 'g' ||
+            cond[0] == 'l' || cond[0] == 'n' ||
+            cond[0] == 'o' || cond[0] == 'u') &&
+           cond[1] == '\0')
+               return 1;
+
+       if (((cond[0] == 'g' && cond[1] == 'e') ||
+            (cond[0] == 'l' && (cond[1] == 'e' ||
+                                cond[1] == 'g')) ||
+            (cond[0] == 'n' && (cond[1] == 'e' ||
+                                cond[1] == 'z')) ||
+            (cond[0] == 'u' && (cond[1] == 'e' ||
+                                cond[1] == 'g' ||
+                                cond[1] == 'l'))) &&
+           cond[2] == '\0')
+               return 1;
+
+       if (cond[0] == 'u' &&
+           (cond[1] == 'g' || cond[1] == 'l') &&
+           cond[2] == 'e' &&
+           cond[3] == '\0')
+               return 1;
+
+       return 0;
+}
+
+static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name)
+{
+       struct ins_ops *ops = NULL;
+
+       if (!strcmp(name, "call") ||
+           !strcmp(name, "jmp") ||
+           !strcmp(name, "jmpl")) {
+               ops = &call_ops;
+       } else if (!strcmp(name, "ret") ||
+                  !strcmp(name, "retl") ||
+                  !strcmp(name, "return")) {
+               ops = &ret_ops;
+       } else if (!strcmp(name, "mov")) {
+               ops = &mov_ops;
+       } else {
+               if (name[0] == 'c' &&
+                   (name[1] == 'w' || name[1] == 'x'))
+                       name += 2;
+
+               if (name[0] == 'b') {
+                       const char *cond = name + 1;
+
+                       if (cond[0] == 'r') {
+                               if (is_branch_reg_cond(cond + 1))
+                                       ops = &jump_ops;
+                       } else if (is_branch_cond(cond)) {
+                               ops = &jump_ops;
+                       }
+               } else if (name[0] == 'f' && name[1] == 'b') {
+                       if (is_branch_float_cond(name + 2))
+                               ops = &jump_ops;
+               }
+       }
+
+       if (ops)
+               arch__associate_ins_ops(arch, name, ops);
+
+       return ops;
+}
+
+static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+       if (!arch->initialized) {
+               arch->initialized = true;
+               arch->associate_instruction_ops = sparc__associate_instruction_ops;
+               arch->objdump.comment_char = '#';
+       }
+
+       return 0;
+}
index 0980dfe3396b188c3dd5692ad673bae137cb357c..10cf889c6d75d2db1d78014215e30948ae3b1cb4 100644 (file)
@@ -592,6 +592,9 @@ static void record__init_features(struct record *rec)
        if (!rec->opts.full_auxtrace)
                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 
+       if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
+               perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+
        perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
@@ -897,6 +900,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
        record__init_features(rec);
 
+       if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+               session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+
        if (forks) {
                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
                                                    argv, data->is_pipe,
@@ -1337,6 +1343,19 @@ static const struct clockid_map clockids[] = {
        CLOCKID_END,
 };
 
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+       struct timespec res;
+
+       *res_ns = 0;
+       if (!clock_getres(clk_id, &res))
+               *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+       else
+               pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+       return 0;
+}
+
 static int parse_clockid(const struct option *opt, const char *str, int unset)
 {
        struct record_opts *opts = (struct record_opts *)opt->value;
@@ -1360,7 +1379,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
 
        /* if its a number, we're done */
        if (sscanf(str, "%d", &opts->clockid) == 1)
-               return 0;
+               return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
 
        /* allow a "CLOCK_" prefix to the name */
        if (!strncasecmp(str, "CLOCK_", 6))
@@ -1369,7 +1388,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
        for (cm = clockids; cm->name; cm++) {
                if (!strcasecmp(str, cm->name)) {
                        opts->clockid = cm->clockid;
-                       return 0;
+                       return get_clockid_res(opts->clockid,
+                                              &opts->clockid_res_ns);
                }
        }
 
index 4da5e32b9e035a97a797836f88a562c3a8206a2e..b5bc85bd0bbea48aec2f745f5b94c42a7d0fec4f 100644 (file)
@@ -44,6 +44,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <subcmd/pager.h>
 
 #include "sane_ctype.h"
 
@@ -912,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
 
 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
                            struct perf_insn *x, u8 *inbuf, int len,
-                           int insn, FILE *fp)
+                           int insn, FILE *fp, int *total_cycles)
 {
        int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
                              dump_insn(x, ip, inbuf, len, NULL),
@@ -921,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
                              en->flags.in_tx ? " INTX" : "",
                              en->flags.abort ? " ABORT" : "");
        if (en->flags.cycles) {
-               printed += fprintf(fp, " %d cycles", en->flags.cycles);
+               *total_cycles += en->flags.cycles;
+               printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles);
                if (insn)
                        printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
        }
@@ -978,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
        u8 buffer[MAXBB];
        unsigned off;
        struct symbol *lastsym = NULL;
+       int total_cycles = 0;
 
        if (!(br && br->nr))
                return 0;
@@ -998,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
                                           x.cpumode, x.cpu, &lastsym, attr, fp);
                printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
-                                           &x, buffer, len, 0, fp);
+                                           &x, buffer, len, 0, fp, &total_cycles);
        }
 
        /* Print all blocks */
@@ -1026,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 
                        printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
                        if (ip == end) {
-                               printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
+                               printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
+                                                           &total_cycles);
                                break;
                        } else {
                                printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
@@ -1104,6 +1108,35 @@ out:
        return printed;
 }
 
+static const char *resolve_branch_sym(struct perf_sample *sample,
+                                     struct perf_evsel *evsel,
+                                     struct thread *thread,
+                                     struct addr_location *al,
+                                     u64 *ip)
+{
+       struct addr_location addr_al;
+       struct perf_event_attr *attr = &evsel->attr;
+       const char *name = NULL;
+
+       if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+               if (sample_addr_correlates_sym(attr)) {
+                       thread__resolve(thread, &addr_al, sample);
+                       if (addr_al.sym)
+                               name = addr_al.sym->name;
+                       else
+                               *ip = sample->addr;
+               } else {
+                       *ip = sample->addr;
+               }
+       } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+               if (al->sym)
+                       name = al->sym->name;
+               else
+                       *ip = sample->ip;
+       }
+       return name;
+}
+
 static int perf_sample__fprintf_callindent(struct perf_sample *sample,
                                           struct perf_evsel *evsel,
                                           struct thread *thread,
@@ -1111,7 +1144,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 {
        struct perf_event_attr *attr = &evsel->attr;
        size_t depth = thread_stack__depth(thread);
-       struct addr_location addr_al;
        const char *name = NULL;
        static int spacing;
        int len = 0;
@@ -1125,22 +1157,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
        if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
                depth += 1;
 
-       if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
-               if (sample_addr_correlates_sym(attr)) {
-                       thread__resolve(thread, &addr_al, sample);
-                       if (addr_al.sym)
-                               name = addr_al.sym->name;
-                       else
-                               ip = sample->addr;
-               } else {
-                       ip = sample->addr;
-               }
-       } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
-               if (al->sym)
-                       name = al->sym->name;
-               else
-                       ip = sample->ip;
-       }
+       name = resolve_branch_sym(sample, evsel, thread, al, &ip);
 
        if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
                dlen += fprintf(fp, "(");
@@ -1646,6 +1663,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
        }
 }
 
+static bool show_event(struct perf_sample *sample,
+                      struct perf_evsel *evsel,
+                      struct thread *thread,
+                      struct addr_location *al)
+{
+       int depth = thread_stack__depth(thread);
+
+       if (!symbol_conf.graph_function)
+               return true;
+
+       if (thread->filter) {
+               if (depth <= thread->filter_entry_depth) {
+                       thread->filter = false;
+                       return false;
+               }
+               return true;
+       } else {
+               const char *s = symbol_conf.graph_function;
+               u64 ip;
+               const char *name = resolve_branch_sym(sample, evsel, thread, al,
+                               &ip);
+               unsigned nlen;
+
+               if (!name)
+                       return false;
+               nlen = strlen(name);
+               while (*s) {
+                       unsigned len = strcspn(s, ",");
+                       if (nlen == len && !strncmp(name, s, len)) {
+                               thread->filter = true;
+                               thread->filter_entry_depth = depth;
+                               return true;
+                       }
+                       s += len;
+                       if (*s == ',')
+                               s++;
+               }
+               return false;
+       }
+}
+
 static void process_event(struct perf_script *script,
                          struct perf_sample *sample, struct perf_evsel *evsel,
                          struct addr_location *al,
@@ -1660,6 +1718,9 @@ static void process_event(struct perf_script *script,
        if (output[type].fields == 0)
                return;
 
+       if (!show_event(sample, evsel, thread, al))
+               return;
+
        ++es->samples;
 
        perf_sample__fprintf_start(sample, thread, evsel,
@@ -1737,6 +1798,9 @@ static void process_event(struct perf_script *script,
 
        if (PRINT_FIELD(METRIC))
                perf_sample__fprint_metric(script, thread, evsel, sample, fp);
+
+       if (verbose)
+               fflush(fp);
 }
 
 static struct scripting_ops    *scripting_ops;
@@ -3100,6 +3164,44 @@ static int perf_script__process_auxtrace_info(struct perf_session *session,
 #define perf_script__process_auxtrace_info 0
 #endif
 
+static int parse_insn_trace(const struct option *opt __maybe_unused,
+                           const char *str __maybe_unused,
+                           int unset __maybe_unused)
+{
+       parse_output_fields(NULL, "+insn,-event,-period", 0);
+       itrace_parse_synth_opts(opt, "i0ns", 0);
+       nanosecs = true;
+       return 0;
+}
+
+static int parse_xed(const struct option *opt __maybe_unused,
+                    const char *str __maybe_unused,
+                    int unset __maybe_unused)
+{
+       force_pager("xed -F insn: -A -64 | less");
+       return 0;
+}
+
+static int parse_call_trace(const struct option *opt __maybe_unused,
+                           const char *str __maybe_unused,
+                           int unset __maybe_unused)
+{
+       parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
+       itrace_parse_synth_opts(opt, "cewp", 0);
+       nanosecs = true;
+       return 0;
+}
+
+static int parse_callret_trace(const struct option *opt __maybe_unused,
+                           const char *str __maybe_unused,
+                           int unset __maybe_unused)
+{
+       parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0);
+       itrace_parse_synth_opts(opt, "crewp", 0);
+       nanosecs = true;
+       return 0;
+}
+
 int cmd_script(int argc, const char **argv)
 {
        bool show_full_info = false;
@@ -3109,7 +3211,10 @@ int cmd_script(int argc, const char **argv)
        char *rec_script_path = NULL;
        char *rep_script_path = NULL;
        struct perf_session *session;
-       struct itrace_synth_opts itrace_synth_opts = { .set = false, };
+       struct itrace_synth_opts itrace_synth_opts = {
+               .set = false,
+               .default_no_sample = true,
+       };
        char *script_path = NULL;
        const char **__argv;
        int i, j, err = 0;
@@ -3184,6 +3289,16 @@ int cmd_script(int argc, const char **argv)
                    "system-wide collection from all CPUs"),
        OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
                   "only consider these symbols"),
+       OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL,
+                       "Decode instructions from itrace", parse_insn_trace),
+       OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
+                       "Run xed disassembler on output", parse_xed),
+       OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL,
+                       "Decode calls from from itrace", parse_call_trace),
+       OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
+                       "Decode calls and returns from itrace", parse_callret_trace),
+       OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
+                       "Only print symbols and callees with --call-trace/--call-ret-trace"),
        OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
                   "Stop display of callgraph at these symbols"),
        OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
@@ -3417,8 +3532,10 @@ int cmd_script(int argc, const char **argv)
                exit(-1);
        }
 
-       if (!script_name)
+       if (!script_name) {
                setup_pager();
+               use_browser = 0;
+       }
 
        session = perf_session__new(&data, false, &script.tool);
        if (session == NULL)
@@ -3439,7 +3556,8 @@ int cmd_script(int argc, const char **argv)
        script.session = session;
        script__setup_sample_type(&script);
 
-       if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
+       if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) ||
+           symbol_conf.graph_function)
                itrace_synth_opts.thread_stack = true;
 
        session->itrace_synth_opts = &itrace_synth_opts;
index b86aba1c8028f0fae6043cefdc96e61adbbafe30..d1028d7755bbcb946a517c333a3757d91e75c8ed 100644 (file)
@@ -409,6 +409,28 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
        return leader;
 }
 
+static bool is_target_alive(struct target *_target,
+                           struct thread_map *threads)
+{
+       struct stat st;
+       int i;
+
+       if (!target__has_task(_target))
+               return true;
+
+       for (i = 0; i < threads->nr; i++) {
+               char path[PATH_MAX];
+
+               scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
+                         threads->map[i].pid);
+
+               if (!stat(path, &st))
+                       return true;
+       }
+
+       return false;
+}
+
 static int __run_perf_stat(int argc, const char **argv, int run_idx)
 {
        int interval = stat_config.interval;
@@ -579,6 +601,8 @@ try_again:
                enable_counters();
                while (!done) {
                        nanosleep(&ts, NULL);
+                       if (!is_target_alive(&target, evsel_list->threads))
+                               break;
                        if (timeout)
                                break;
                        if (interval) {
index d21d8751e74910db9639f0b3c450abf42eb223ab..b2838de13de02e29f6ad766eb8de11ec27bfa31b 100644 (file)
@@ -1134,11 +1134,6 @@ static int __cmd_top(struct perf_top *top)
         if (!target__none(&opts->target))
                 perf_evlist__enable(top->evlist);
 
-       /* Wait for a minimal set of events before starting the snapshot */
-       perf_evlist__poll(top->evlist, 100);
-
-       perf_top__mmap_read(top);
-
        ret = -1;
        if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
                                                            display_thread), top)) {
@@ -1156,6 +1151,11 @@ static int __cmd_top(struct perf_top *top)
                }
        }
 
+       /* Wait for a minimal set of events before starting the snapshot */
+       perf_evlist__poll(top->evlist, 100);
+
+       perf_top__mmap_read(top);
+
        while (!done) {
                u64 hits = top->samples;
 
@@ -1257,7 +1257,14 @@ int cmd_top(int argc, const char **argv)
                                .uses_mmap   = true,
                        },
                        .proc_map_timeout    = 500,
-                       .overwrite      = 1,
+                       /*
+                        * FIXME: This will lose PERF_RECORD_MMAP and other metadata
+                        * when we pause, fix that and reenable. Probably using a
+                        * separate evlist with a dummy event, i.e. a non-overwrite
+                        * ring buffer just for metadata events, while PERF_RECORD_SAMPLE
+                        * stays in overwrite mode. -acme
+                        * */
+                       .overwrite      = 0,
                },
                .max_stack           = sysctl__max_stack(),
                .annotation_opts     = annotation__default_options,
@@ -1372,6 +1379,8 @@ int cmd_top(int argc, const char **argv)
                    "Show raw trace event output (do not use print fmt or plugins)"),
        OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
                    "Show entries in a hierarchy"),
+       OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
+                   "Use a backward ring buffer, default: no"),
        OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
        OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
                        "number of thread to run event synthesize"),
index 90289f31dd87c774ef882c24b1cdf55da9edcca3..dc8a6c4986ce2066b0e76cd58e0edb2f7f09852a 100644 (file)
@@ -89,6 +89,8 @@ struct trace {
        u64                     base_time;
        FILE                    *output;
        unsigned long           nr_events;
+       unsigned long           nr_events_printed;
+       unsigned long           max_events;
        struct strlist          *ev_qualifier;
        struct {
                size_t          nr;
@@ -612,6 +614,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
 
 struct syscall_arg_fmt {
        size_t     (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+       unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
        void       *parm;
        const char *name;
        bool       show_zero;
@@ -723,6 +726,10 @@ static struct syscall_fmt {
          .arg = { [0] = { .scnprintf = SCA_HEX,        /* addr */ },
                   [2] = { .scnprintf = SCA_MMAP_PROT,  /* prot */ },
                   [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
+       { .name     = "mount",
+         .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
+                  [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
+                          .mask_val  = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
        { .name     = "mprotect",
          .arg = { [0] = { .scnprintf = SCA_HEX,        /* start */ },
                   [2] = { .scnprintf = SCA_MMAP_PROT,  /* prot */ }, }, },
@@ -832,7 +839,8 @@ static struct syscall_fmt {
          .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
        { .name     = "tkill",
          .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
-       { .name     = "umount2", .alias = "umount", },
+       { .name     = "umount2", .alias = "umount",
+         .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
        { .name     = "uname", .alias = "newuname", },
        { .name     = "unlinkat",
          .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
@@ -856,6 +864,18 @@ static struct syscall_fmt *syscall_fmt__find(const char *name)
        return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
 }
 
+static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
+{
+       int i, nmemb = ARRAY_SIZE(syscall_fmts);
+
+       for (i = 0; i < nmemb; ++i) {
+               if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0)
+                       return &syscall_fmts[i];
+       }
+
+       return NULL;
+}
+
 /*
  * is_exit: is this "exit" or "exit_group"?
  * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
@@ -1485,6 +1505,19 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
        return scnprintf(bf, size, "arg%d: ", arg->idx);
 }
 
+/*
+ * Check if the value is in fact zero, i.e. mask whatever needs masking, such
+ * as mount 'flags' argument that needs ignoring some magic flag, see comment
+ * in tools/perf/trace/beauty/mount_flags.c
+ */
+static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val)
+{
+       if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val)
+               return sc->arg_fmt[arg->idx].mask_val(arg, val);
+
+       return val;
+}
+
 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
                                     struct syscall_arg *arg, unsigned long val)
 {
@@ -1533,6 +1566,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
                                continue;
 
                        val = syscall_arg__val(&arg, arg.idx);
+                       /*
+                        * Some syscall args need some mask, most don't and
+                        * return val untouched.
+                        */
+                       val = syscall__mask_val(sc, &arg, val);
 
                        /*
                         * Suppress this argument if its value is zero and
@@ -1664,6 +1702,8 @@ static int trace__printf_interrupted_entry(struct trace *trace)
        printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
        ttrace->entry_pending = false;
 
+       ++trace->nr_events_printed;
+
        return printed;
 }
 
@@ -1810,12 +1850,14 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
        int max_stack = evsel->attr.sample_max_stack ?
                        evsel->attr.sample_max_stack :
                        trace->max_stack;
+       int err;
 
-       if (machine__resolve(trace->host, &al, sample) < 0 ||
-           thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
+       if (machine__resolve(trace->host, &al, sample) < 0)
                return -1;
 
-       return 0;
+       err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
+       addr_location__put(&al);
+       return err;
 }
 
 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
@@ -1940,6 +1982,13 @@ errno_print: {
 
        fputc('\n', trace->output);
 
+       /*
+        * We only consider an 'event' for the sake of --max-events a non-filtered
+        * sys_enter + sys_exit and other tracepoint events.
+        */
+       if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
+               interrupted = true;
+
        if (callchain_ret > 0)
                trace__fprintf_callchain(trace, sample);
        else if (callchain_ret < 0)
@@ -2072,14 +2121,25 @@ static void bpf_output__fprintf(struct trace *trace,
 {
        binary__fprintf(sample->raw_data, sample->raw_size, 8,
                        bpf_output__printer, NULL, trace->output);
+       ++trace->nr_events_printed;
 }
 
 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
                                union perf_event *event __maybe_unused,
                                struct perf_sample *sample)
 {
-       struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+       struct thread *thread;
        int callchain_ret = 0;
+       /*
+        * Check if we called perf_evsel__disable(evsel) due to, for instance,
+        * this event's max_events having been hit and this is an entry coming
+        * from the ring buffer that we should discard, since the max events
+        * have already been considered/printed.
+        */
+       if (evsel->disabled)
+               return 0;
+
+       thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 
        if (sample->callchain) {
                callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
@@ -2127,6 +2187,12 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
                        event_format__fprintf(evsel->tp_format, sample->cpu,
                                              sample->raw_data, sample->raw_size,
                                              trace->output);
+                       ++trace->nr_events_printed;
+
+                       if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
+                               perf_evsel__disable(evsel);
+                               perf_evsel__close(evsel);
+                       }
                }
        }
 
@@ -2137,8 +2203,8 @@ newline:
                trace__fprintf_callchain(trace, sample);
        else if (callchain_ret < 0)
                pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
-       thread__put(thread);
 out:
+       thread__put(thread);
        return 0;
 }
 
@@ -2225,6 +2291,8 @@ static int trace__pgfault(struct trace *trace,
                trace__fprintf_callchain(trace, sample);
        else if (callchain_ret < 0)
                pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+
+       ++trace->nr_events_printed;
 out:
        err = 0;
 out_put:
@@ -2402,6 +2470,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
                tracepoint_handler handler = evsel->handler;
                handler(trace, evsel, event, sample);
        }
+
+       if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
+               interrupted = true;
 }
 
 static int trace__add_syscall_newtp(struct trace *trace)
@@ -2706,7 +2777,7 @@ next_event:
                int timeout = done ? 100 : -1;
 
                if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
-                       if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
+                       if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
                                draining = true;
 
                        goto again;
@@ -3138,6 +3209,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
        int len = strlen(str) + 1, err = -1, list, idx;
        char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
        char group_name[PATH_MAX];
+       struct syscall_fmt *fmt;
 
        if (strace_groups_dir == NULL)
                return -1;
@@ -3155,12 +3227,19 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
                if (syscalltbl__id(trace->sctbl, s) >= 0 ||
                    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
                        list = 1;
+                       goto do_concat;
+               }
+
+               fmt = syscall_fmt__find_by_alias(s);
+               if (fmt != NULL) {
+                       list = 1;
+                       s = fmt->name;
                } else {
                        path__join(group_name, sizeof(group_name), strace_groups_dir, s);
                        if (access(group_name, R_OK) == 0)
                                list = 1;
                }
-
+do_concat:
                if (lists[list]) {
                        sprintf(lists[list] + strlen(lists[list]), ",%s", s);
                } else {
@@ -3249,6 +3328,7 @@ int cmd_trace(int argc, const char **argv)
                .trace_syscalls = false,
                .kernel_syscallchains = false,
                .max_stack = UINT_MAX,
+               .max_events = ULONG_MAX,
        };
        const char *output_name = NULL;
        const struct option trace_options[] = {
@@ -3301,6 +3381,8 @@ int cmd_trace(int argc, const char **argv)
                     &record_parse_callchain_opt),
        OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
                    "Show the kernel callchains on the syscall exit path"),
+       OPT_ULONG(0, "max-events", &trace.max_events,
+               "Set the maximum number of events to print, exit after that is reached. "),
        OPT_UINTEGER(0, "min-stack", &trace.min_stack,
                     "Set the minimum stack depth when parsing the callchain, "
                     "anything below the specified depth will be ignored."),
index c72cc73a6b09a7c008eec2e19fda38e8924c5d0f..9531f7bd7d9bd9e114fd57840e4f8fb87a642610 100755 (executable)
@@ -5,6 +5,7 @@ HEADERS='
 include/uapi/drm/drm.h
 include/uapi/drm/i915_drm.h
 include/uapi/linux/fcntl.h
+include/uapi/linux/fs.h
 include/uapi/linux/kcmp.h
 include/uapi/linux/kvm.h
 include/uapi/linux/in.h
index 21bf7f5a3cf51a1a42e3169daa738c8e7e0a8d83..0ed4a34c74c4bc6d2c38457c05a59dd9f6ea8551 100644 (file)
@@ -81,6 +81,7 @@ struct record_opts {
        unsigned     initial_delay;
        bool         use_clockid;
        clockid_t    clockid;
+       u64          clockid_res_ns;
        unsigned int proc_map_timeout;
 };
 
diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
deleted file mode 100644 (file)
index b494a67..0000000
+++ /dev/null
@@ -1,339 +0,0 @@
-#!/usr/bin/python2
-# call-graph-from-sql.py: create call-graph from sql database
-# Copyright (c) 2014-2017, Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-
-# To use this script you will need to have exported data using either the
-# export-to-sqlite.py or the export-to-postgresql.py script.  Refer to those
-# scripts for details.
-#
-# Following on from the example in the export scripts, a
-# call-graph can be displayed for the pt_example database like this:
-#
-#      python tools/perf/scripts/python/call-graph-from-sql.py pt_example
-#
-# Note that for PostgreSQL, this script supports connecting to remote databases
-# by setting hostname, port, username, password, and dbname e.g.
-#
-#      python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
-#
-# The result is a GUI window with a tree representing a context-sensitive
-# call-graph.  Expanding a couple of levels of the tree and adjusting column
-# widths to suit will display something like:
-#
-#                                         Call Graph: pt_example
-# Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
-# v- ls
-#     v- 2638:2638
-#         v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
-#           |- unknown               unknown       1        13198     0.1              1              0.0
-#           >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
-#           >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
-#           v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
-#              >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
-#              >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
-#              >- __libc_csu_init    ls            1        10354     0.1             10              0.0
-#              |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
-#              v- main               ls            1      8182043    99.6         180254             99.9
-#
-# Points to note:
-#      The top level is a command name (comm)
-#      The next level is a thread (pid:tid)
-#      Subsequent levels are functions
-#      'Count' is the number of calls
-#      'Time' is the elapsed time until the function returns
-#      Percentages are relative to the level above
-#      'Branch Count' is the total number of branches for that function and all
-#       functions that it calls
-
-import sys
-from PySide.QtCore import *
-from PySide.QtGui import *
-from PySide.QtSql import *
-from decimal import *
-
-class TreeItem():
-
-       def __init__(self, db, row, parent_item):
-               self.db = db
-               self.row = row
-               self.parent_item = parent_item
-               self.query_done = False;
-               self.child_count = 0
-               self.child_items = []
-               self.data = ["", "", "", "", "", "", ""]
-               self.comm_id = 0
-               self.thread_id = 0
-               self.call_path_id = 1
-               self.branch_count = 0
-               self.time = 0
-               if not parent_item:
-                       self.setUpRoot()
-
-       def setUpRoot(self):
-               self.query_done = True
-               query = QSqlQuery(self.db)
-               ret = query.exec_('SELECT id, comm FROM comms')
-               if not ret:
-                       raise Exception("Query failed: " + query.lastError().text())
-               while query.next():
-                       if not query.value(0):
-                               continue
-                       child_item = TreeItem(self.db, self.child_count, self)
-                       self.child_items.append(child_item)
-                       self.child_count += 1
-                       child_item.setUpLevel1(query.value(0), query.value(1))
-
-       def setUpLevel1(self, comm_id, comm):
-               self.query_done = True;
-               self.comm_id = comm_id
-               self.data[0] = comm
-               self.child_items = []
-               self.child_count = 0
-               query = QSqlQuery(self.db)
-               ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
-               if not ret:
-                       raise Exception("Query failed: " + query.lastError().text())
-               while query.next():
-                       child_item = TreeItem(self.db, self.child_count, self)
-                       self.child_items.append(child_item)
-                       self.child_count += 1
-                       child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
-
-       def setUpLevel2(self, comm_id, thread_id, pid, tid):
-               self.comm_id = comm_id
-               self.thread_id = thread_id
-               self.data[0] = str(pid) + ":" + str(tid)
-
-       def getChildItem(self, row):
-               return self.child_items[row]
-
-       def getParentItem(self):
-               return self.parent_item
-
-       def getRow(self):
-               return self.row
-
-       def timePercent(self, b):
-               if not self.time:
-                       return "0.0"
-               x = (b * Decimal(100)) / self.time
-               return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
-       def branchPercent(self, b):
-               if not self.branch_count:
-                       return "0.0"
-               x = (b * Decimal(100)) / self.branch_count
-               return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
-       def addChild(self, call_path_id, name, dso, count, time, branch_count):
-               child_item = TreeItem(self.db, self.child_count, self)
-               child_item.comm_id = self.comm_id
-               child_item.thread_id = self.thread_id
-               child_item.call_path_id = call_path_id
-               child_item.branch_count = branch_count
-               child_item.time = time
-               child_item.data[0] = name
-               if dso == "[kernel.kallsyms]":
-                       dso = "[kernel]"
-               child_item.data[1] = dso
-               child_item.data[2] = str(count)
-               child_item.data[3] = str(time)
-               child_item.data[4] = self.timePercent(time)
-               child_item.data[5] = str(branch_count)
-               child_item.data[6] = self.branchPercent(branch_count)
-               self.child_items.append(child_item)
-               self.child_count += 1
-
-       def selectCalls(self):
-               self.query_done = True;
-               query = QSqlQuery(self.db)
-               ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
-                                 '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
-                                 '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
-                                 '( SELECT ip FROM call_paths where id = call_path_id ) '
-                                 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
-                                 ' ORDER BY call_path_id')
-               if not ret:
-                       raise Exception("Query failed: " + query.lastError().text())
-               last_call_path_id = 0
-               name = ""
-               dso = ""
-               count = 0
-               branch_count = 0
-               total_branch_count = 0
-               time = 0
-               total_time = 0
-               while query.next():
-                       if query.value(1) == last_call_path_id:
-                               count += 1
-                               branch_count += query.value(2)
-                               time += query.value(4) - query.value(3)
-                       else:
-                               if count:
-                                       self.addChild(last_call_path_id, name, dso, count, time, branch_count)
-                               last_call_path_id = query.value(1)
-                               name = query.value(5)
-                               dso = query.value(6)
-                               count = 1
-                               total_branch_count += branch_count
-                               total_time += time
-                               branch_count = query.value(2)
-                               time = query.value(4) - query.value(3)
-               if count:
-                       self.addChild(last_call_path_id, name, dso, count, time, branch_count)
-               total_branch_count += branch_count
-               total_time += time
-               # Top level does not have time or branch count, so fix that here
-               if total_branch_count > self.branch_count:
-                       self.branch_count = total_branch_count
-                       if self.branch_count:
-                               for child_item in self.child_items:
-                                       child_item.data[6] = self.branchPercent(child_item.branch_count)
-               if total_time > self.time:
-                       self.time = total_time
-                       if self.time:
-                               for child_item in self.child_items:
-                                       child_item.data[4] = self.timePercent(child_item.time)
-
-       def childCount(self):
-               if not self.query_done:
-                       self.selectCalls()
-               return self.child_count
-
-       def columnCount(self):
-               return 7
-
-       def columnHeader(self, column):
-               headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
-               return headers[column]
-
-       def getData(self, column):
-               return self.data[column]
-
-class TreeModel(QAbstractItemModel):
-
-       def __init__(self, db, parent=None):
-               super(TreeModel, self).__init__(parent)
-               self.db = db
-               self.root = TreeItem(db, 0, None)
-
-       def columnCount(self, parent):
-               return self.root.columnCount()
-
-       def rowCount(self, parent):
-               if parent.isValid():
-                       parent_item = parent.internalPointer()
-               else:
-                       parent_item = self.root
-               return parent_item.childCount()
-
-       def headerData(self, section, orientation, role):
-               if role == Qt.TextAlignmentRole:
-                       if section > 1:
-                               return Qt.AlignRight
-               if role != Qt.DisplayRole:
-                       return None
-               if orientation != Qt.Horizontal:
-                       return None
-               return self.root.columnHeader(section)
-
-       def parent(self, child):
-               child_item = child.internalPointer()
-               if child_item is self.root:
-                       return QModelIndex()
-               parent_item = child_item.getParentItem()
-               return self.createIndex(parent_item.getRow(), 0, parent_item)
-
-       def index(self, row, column, parent):
-               if parent.isValid():
-                       parent_item = parent.internalPointer()
-               else:
-                       parent_item = self.root
-               child_item = parent_item.getChildItem(row)
-               return self.createIndex(row, column, child_item)
-
-       def data(self, index, role):
-               if role == Qt.TextAlignmentRole:
-                       if index.column() > 1:
-                               return Qt.AlignRight
-               if role != Qt.DisplayRole:
-                       return None
-               index_item = index.internalPointer()
-               return index_item.getData(index.column())
-
-class MainWindow(QMainWindow):
-
-       def __init__(self, db, dbname, parent=None):
-               super(MainWindow, self).__init__(parent)
-
-               self.setObjectName("MainWindow")
-               self.setWindowTitle("Call Graph: " + dbname)
-               self.move(100, 100)
-               self.resize(800, 600)
-               style = self.style()
-               icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
-               self.setWindowIcon(icon);
-
-               self.model = TreeModel(db)
-
-               self.view = QTreeView()
-               self.view.setModel(self.model)
-
-               self.setCentralWidget(self.view)
-
-if __name__ == '__main__':
-       if (len(sys.argv) < 2):
-               print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>"
-               raise Exception("Too few arguments")
-
-       dbname = sys.argv[1]
-
-       is_sqlite3 = False
-       try:
-               f = open(dbname)
-               if f.read(15) == "SQLite format 3":
-                       is_sqlite3 = True
-               f.close()
-       except:
-               pass
-
-       if is_sqlite3:
-               db = QSqlDatabase.addDatabase('QSQLITE')
-       else:
-               db = QSqlDatabase.addDatabase('QPSQL')
-               opts = dbname.split()
-               for opt in opts:
-                       if '=' in opt:
-                               opt = opt.split('=')
-                               if opt[0] == 'hostname':
-                                       db.setHostName(opt[1])
-                               elif opt[0] == 'port':
-                                       db.setPort(int(opt[1]))
-                               elif opt[0] == 'username':
-                                       db.setUserName(opt[1])
-                               elif opt[0] == 'password':
-                                       db.setPassword(opt[1])
-                               elif opt[0] == 'dbname':
-                                       dbname = opt[1]
-                       else:
-                               dbname = opt
-
-       db.setDatabaseName(dbname)
-       if not db.open():
-               raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
-
-       app = QApplication(sys.argv)
-       window = MainWindow(db, dbname)
-       window.show()
-       err = app.exec_()
-       db.close()
-       sys.exit(err)
index e46f51b1751310a5263283fa9a9b5f9227ed1e52..0564dd7377f22f098d98a254bc028949628dd824 100644 (file)
@@ -59,7 +59,7 @@ import datetime
 #      pt_example=# \q
 #
 # An example of using the database is provided by the script
-# call-graph-from-sql.py.  Refer to that script for details.
+# exported-sql-viewer.py.  Refer to that script for details.
 #
 # Tables:
 #
index e4bb82c8aba9e835ea4712e5c00d43443e1d9a9a..245caf2643ed1c4548549be6a48426e1830ab5e1 100644 (file)
@@ -40,7 +40,7 @@ import datetime
 #      sqlite> .quit
 #
 # An example of using the database is provided by the script
-# call-graph-from-sql.py.  Refer to that script for details.
+# exported-sql-viewer.py.  Refer to that script for details.
 #
 # The database structure is practically the same as created by the script
 # export-to-postgresql.py. Refer to that script for details.  A notable
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
new file mode 100755 (executable)
index 0000000..24cb0bd
--- /dev/null
@@ -0,0 +1,2128 @@
+#!/usr/bin/python2
+# SPDX-License-Identifier: GPL-2.0
+# exported-sql-viewer.py: view data from sql database
+# Copyright (c) 2014-2018, Intel Corporation.
+
+# To use this script you will need to have exported data using either the
+# export-to-sqlite.py or the export-to-postgresql.py script.  Refer to those
+# scripts for details.
+#
+# Following on from the example in the export scripts, a
+# call-graph can be displayed for the pt_example database like this:
+#
+#      python tools/perf/scripts/python/exported-sql-viewer.py pt_example
+#
+# Note that for PostgreSQL, this script supports connecting to remote databases
+# by setting hostname, port, username, password, and dbname e.g.
+#
+#      python tools/perf/scripts/python/exported-sql-viewer.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
+#
+# The result is a GUI window with a tree representing a context-sensitive
+# call-graph.  Expanding a couple of levels of the tree and adjusting column
+# widths to suit will display something like:
+#
+#                                         Call Graph: pt_example
+# Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
+# v- ls
+#     v- 2638:2638
+#         v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
+#           |- unknown               unknown       1        13198     0.1              1              0.0
+#           >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
+#           >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
+#           v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
+#              >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
+#              >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
+#              >- __libc_csu_init    ls            1        10354     0.1             10              0.0
+#              |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
+#              v- main               ls            1      8182043    99.6         180254             99.9
+#
+# Points to note:
+#      The top level is a command name (comm)
+#      The next level is a thread (pid:tid)
+#      Subsequent levels are functions
+#      'Count' is the number of calls
+#      'Time' is the elapsed time until the function returns
+#      Percentages are relative to the level above
+#      'Branch Count' is the total number of branches for that function and all
+#       functions that it calls
+
+# There is also a "All branches" report, which displays branches and
+# possibly disassembly.  However, presently, the only supported disassembler is
+# Intel XED, and additionally the object code must be present in perf build ID
+# cache. To use Intel XED, libxed.so must be present. To build and install
+# libxed.so:
+#            git clone https://github.com/intelxed/mbuild.git mbuild
+#            git clone https://github.com/intelxed/xed
+#            cd xed
+#            ./mfile.py --share
+#            sudo ./mfile.py --prefix=/usr/local install
+#            sudo ldconfig
+#
+# Example report:
+#
+# Time           CPU  Command  PID    TID    Branch Type            In Tx  Branch
+# 8107675239590  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+#                                                                              7fab593ea260 48 89 e7                                        mov %rsp, %rdi
+# 8107675239899  2    ls       22011  22011  hardware interrupt     No         7fab593ea260 _start (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675241900  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+#                                                                              7fab593ea260 48 89 e7                                        mov %rsp, %rdi
+#                                                                              7fab593ea263 e8 c8 06 00 00                                  callq  0x7fab593ea930
+# 8107675241900  2    ls       22011  22011  call                   No         7fab593ea263 _start+0x3 (ld-2.19.so) -> 7fab593ea930 _dl_start (ld-2.19.so)
+#                                                                              7fab593ea930 55                                              pushq  %rbp
+#                                                                              7fab593ea931 48 89 e5                                        mov %rsp, %rbp
+#                                                                              7fab593ea934 41 57                                           pushq  %r15
+#                                                                              7fab593ea936 41 56                                           pushq  %r14
+#                                                                              7fab593ea938 41 55                                           pushq  %r13
+#                                                                              7fab593ea93a 41 54                                           pushq  %r12
+#                                                                              7fab593ea93c 53                                              pushq  %rbx
+#                                                                              7fab593ea93d 48 89 fb                                        mov %rdi, %rbx
+#                                                                              7fab593ea940 48 83 ec 68                                     sub $0x68, %rsp
+#                                                                              7fab593ea944 0f 31                                           rdtsc
+#                                                                              7fab593ea946 48 c1 e2 20                                     shl $0x20, %rdx
+#                                                                              7fab593ea94a 89 c0                                           mov %eax, %eax
+#                                                                              7fab593ea94c 48 09 c2                                        or %rax, %rdx
+#                                                                              7fab593ea94f 48 8b 05 1a 15 22 00                            movq  0x22151a(%rip), %rax
+# 8107675242232  2    ls       22011  22011  hardware interrupt     No         7fab593ea94f _dl_start+0x1f (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675242900  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea94f _dl_start+0x1f (ld-2.19.so)
+#                                                                              7fab593ea94f 48 8b 05 1a 15 22 00                            movq  0x22151a(%rip), %rax
+#                                                                              7fab593ea956 48 89 15 3b 13 22 00                            movq  %rdx, 0x22133b(%rip)
+# 8107675243232  2    ls       22011  22011  hardware interrupt     No         7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+
+import sys
+import weakref
+import threading
+import string
+import cPickle
+import re
+import os
+from PySide.QtCore import *
+from PySide.QtGui import *
+from PySide.QtSql import *
+from decimal import *
+from ctypes import *
+from multiprocessing import Process, Array, Value, Event
+
+# Data formatting helpers
+
+def tohex(ip):
+       if ip < 0:
+               ip += 1 << 64
+       return "%x" % ip
+
+def offstr(offset):
+       if offset:
+               return "+0x%x" % offset
+       return ""
+
+def dsoname(name):
+       if name == "[kernel.kallsyms]":
+               return "[kernel]"
+       return name
+
+# Percent to one decimal place
+
+def PercentToOneDP(n, d):
+       if not d:
+               return "0.0"
+       x = (n * Decimal(100)) / d
+       return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP))
+
+# Helper for queries that must not fail
+
+def QueryExec(query, stmt):
+       ret = query.exec_(stmt)
+       if not ret:
+               raise Exception("Query failed: " + query.lastError().text())
+
+# Background thread
+
+class Thread(QThread):
+
+       done = Signal(object)
+
+       def __init__(self, task, param=None, parent=None):
+               super(Thread, self).__init__(parent)
+               self.task = task
+               self.param = param
+
+       def run(self):
+               while True:
+                       if self.param is None:
+                               done, result = self.task()
+                       else:
+                               done, result = self.task(self.param)
+                       self.done.emit(result)
+                       if done:
+                               break
+
+# Tree data model
+
+class TreeModel(QAbstractItemModel):
+
+       def __init__(self, root, parent=None):
+               super(TreeModel, self).__init__(parent)
+               self.root = root
+               self.last_row_read = 0
+
+       def Item(self, parent):
+               if parent.isValid():
+                       return parent.internalPointer()
+               else:
+                       return self.root
+
+       def rowCount(self, parent):
+               result = self.Item(parent).childCount()
+               if result < 0:
+                       result = 0
+                       self.dataChanged.emit(parent, parent)
+               return result
+
+       def hasChildren(self, parent):
+               return self.Item(parent).hasChildren()
+
+       def headerData(self, section, orientation, role):
+               if role == Qt.TextAlignmentRole:
+                       return self.columnAlignment(section)
+               if role != Qt.DisplayRole:
+                       return None
+               if orientation != Qt.Horizontal:
+                       return None
+               return self.columnHeader(section)
+
+       def parent(self, child):
+               child_item = child.internalPointer()
+               if child_item is self.root:
+                       return QModelIndex()
+               parent_item = child_item.getParentItem()
+               return self.createIndex(parent_item.getRow(), 0, parent_item)
+
+       def index(self, row, column, parent):
+               child_item = self.Item(parent).getChildItem(row)
+               return self.createIndex(row, column, child_item)
+
+       def DisplayData(self, item, index):
+               return item.getData(index.column())
+
+       def FetchIfNeeded(self, row):
+               if row > self.last_row_read:
+                       self.last_row_read = row
+                       if row + 10 >= self.root.child_count:
+                               self.fetcher.Fetch(glb_chunk_sz)
+
+       def columnAlignment(self, column):
+               return Qt.AlignLeft
+
+       def columnFont(self, column):
+               return None
+
+       def data(self, index, role):
+               if role == Qt.TextAlignmentRole:
+                       return self.columnAlignment(index.column())
+               if role == Qt.FontRole:
+                       return self.columnFont(index.column())
+               if role != Qt.DisplayRole:
+                       return None
+               item = index.internalPointer()
+               return self.DisplayData(item, index)
+
+# Table data model
+
+class TableModel(QAbstractTableModel):
+
+       def __init__(self, parent=None):
+               super(TableModel, self).__init__(parent)
+               self.child_count = 0
+               self.child_items = []
+               self.last_row_read = 0
+
+       def Item(self, parent):
+               if parent.isValid():
+                       return parent.internalPointer()
+               else:
+                       return self
+
+       def rowCount(self, parent):
+               return self.child_count
+
+       def headerData(self, section, orientation, role):
+               if role == Qt.TextAlignmentRole:
+                       return self.columnAlignment(section)
+               if role != Qt.DisplayRole:
+                       return None
+               if orientation != Qt.Horizontal:
+                       return None
+               return self.columnHeader(section)
+
+       def index(self, row, column, parent):
+               return self.createIndex(row, column, self.child_items[row])
+
+       def DisplayData(self, item, index):
+               return item.getData(index.column())
+
+       def FetchIfNeeded(self, row):
+               if row > self.last_row_read:
+                       self.last_row_read = row
+                       if row + 10 >= self.child_count:
+                               self.fetcher.Fetch(glb_chunk_sz)
+
+       def columnAlignment(self, column):
+               return Qt.AlignLeft
+
+       def columnFont(self, column):
+               return None
+
+       def data(self, index, role):
+               if role == Qt.TextAlignmentRole:
+                       return self.columnAlignment(index.column())
+               if role == Qt.FontRole:
+                       return self.columnFont(index.column())
+               if role != Qt.DisplayRole:
+                       return None
+               item = index.internalPointer()
+               return self.DisplayData(item, index)
+
+# Model cache
+
+model_cache = weakref.WeakValueDictionary()
+model_cache_lock = threading.Lock()
+
+def LookupCreateModel(model_name, create_fn):
+       model_cache_lock.acquire()
+       try:
+               model = model_cache[model_name]
+       except:
+               model = None
+       if model is None:
+               model = create_fn()
+               model_cache[model_name] = model
+       model_cache_lock.release()
+       return model
+
+# Find bar
+
+class FindBar():
+
+       def __init__(self, parent, finder, is_reg_expr=False):
+               self.finder = finder
+               self.context = []
+               self.last_value = None
+               self.last_pattern = None
+
+               label = QLabel("Find:")
+               label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+               self.textbox = QComboBox()
+               self.textbox.setEditable(True)
+               self.textbox.currentIndexChanged.connect(self.ValueChanged)
+
+               self.progress = QProgressBar()
+               self.progress.setRange(0, 0)
+               self.progress.hide()
+
+               if is_reg_expr:
+                       self.pattern = QCheckBox("Regular Expression")
+               else:
+                       self.pattern = QCheckBox("Pattern")
+               self.pattern.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+               self.next_button = QToolButton()
+               self.next_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowDown))
+               self.next_button.released.connect(lambda: self.NextPrev(1))
+
+               self.prev_button = QToolButton()
+               self.prev_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowUp))
+               self.prev_button.released.connect(lambda: self.NextPrev(-1))
+
+               self.close_button = QToolButton()
+               self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+               self.close_button.released.connect(self.Deactivate)
+
+               self.hbox = QHBoxLayout()
+               self.hbox.setContentsMargins(0, 0, 0, 0)
+
+               self.hbox.addWidget(label)
+               self.hbox.addWidget(self.textbox)
+               self.hbox.addWidget(self.progress)
+               self.hbox.addWidget(self.pattern)
+               self.hbox.addWidget(self.next_button)
+               self.hbox.addWidget(self.prev_button)
+               self.hbox.addWidget(self.close_button)
+
+               self.bar = QWidget()
+               self.bar.setLayout(self.hbox);
+               self.bar.hide()
+
+       def Widget(self):
+               return self.bar
+
+       def Activate(self):
+               self.bar.show()
+               self.textbox.setFocus()
+
+       def Deactivate(self):
+               self.bar.hide()
+
+       def Busy(self):
+               self.textbox.setEnabled(False)
+               self.pattern.hide()
+               self.next_button.hide()
+               self.prev_button.hide()
+               self.progress.show()
+
+       def Idle(self):
+               self.textbox.setEnabled(True)
+               self.progress.hide()
+               self.pattern.show()
+               self.next_button.show()
+               self.prev_button.show()
+
+       def Find(self, direction):
+               value = self.textbox.currentText()
+               pattern = self.pattern.isChecked()
+               self.last_value = value
+               self.last_pattern = pattern
+               self.finder.Find(value, direction, pattern, self.context)
+
+       def ValueChanged(self):
+               value = self.textbox.currentText()
+               pattern = self.pattern.isChecked()
+               index = self.textbox.currentIndex()
+               data = self.textbox.itemData(index)
+               # Store the pattern in the combo box to keep it with the text value
+               if data == None:
+                       self.textbox.setItemData(index, pattern)
+               else:
+                       self.pattern.setChecked(data)
+               self.Find(0)
+
+       def NextPrev(self, direction):
+               value = self.textbox.currentText()
+               pattern = self.pattern.isChecked()
+               if value != self.last_value:
+                       index = self.textbox.findText(value)
+                       # Allow for a button press before the value has been added to the combo box
+                       if index < 0:
+                               index = self.textbox.count()
+                               self.textbox.addItem(value, pattern)
+                               self.textbox.setCurrentIndex(index)
+                               return
+                       else:
+                               self.textbox.setItemData(index, pattern)
+               elif pattern != self.last_pattern:
+                       # Keep the pattern recorded in the combo box up to date
+                       index = self.textbox.currentIndex()
+                       self.textbox.setItemData(index, pattern)
+               self.Find(direction)
+
+       def NotFound(self):
+               QMessageBox.information(self.bar, "Find", "'" + self.textbox.currentText() + "' not found")
+
+# Context-sensitive call graph data model item base
+
+class CallGraphLevelItemBase(object):
+
+       def __init__(self, glb, row, parent_item):
+               self.glb = glb
+               self.row = row
+               self.parent_item = parent_item
+               self.query_done = False;
+               self.child_count = 0
+               self.child_items = []
+
+       def getChildItem(self, row):
+               return self.child_items[row]
+
+       def getParentItem(self):
+               return self.parent_item
+
+       def getRow(self):
+               return self.row
+
+       def childCount(self):
+               if not self.query_done:
+                       self.Select()
+                       if not self.child_count:
+                               return -1
+               return self.child_count
+
+       def hasChildren(self):
+               if not self.query_done:
+                       return True
+               return self.child_count > 0
+
+       def getData(self, column):
+               return self.data[column]
+
+# Context-sensitive call graph data model level 2+ item base
+
+class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
+
+       def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item):
+               super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item)
+               self.comm_id = comm_id
+               self.thread_id = thread_id
+               self.call_path_id = call_path_id
+               self.branch_count = branch_count
+               self.time = time
+
+       def Select(self):
+               self.query_done = True;
+               query = QSqlQuery(self.glb.db)
+               QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)"
+                                       " FROM calls"
+                                       " INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+                                       " INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+                                       " INNER JOIN dsos ON symbols.dso_id = dsos.id"
+                                       " WHERE parent_call_path_id = " + str(self.call_path_id) +
+                                       " AND comm_id = " + str(self.comm_id) +
+                                       " AND thread_id = " + str(self.thread_id) +
+                                       " GROUP BY call_path_id, name, short_name"
+                                       " ORDER BY call_path_id")
+               while query.next():
+                       child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self)
+                       self.child_items.append(child_item)
+                       self.child_count += 1
+
+# Context-sensitive call graph data model level three item
+
+class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase):
+
+       def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item):
+               super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item)
+               dso = dsoname(dso)
+               self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
+               self.dbid = call_path_id
+
+# Context-sensitive call graph data model level two item
+
+class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase):
+
+       def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item):
+               super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item)
+               self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
+               self.dbid = thread_id
+
+       def Select(self):
+               super(CallGraphLevelTwoItem, self).Select()
+               for child_item in self.child_items:
+                       self.time += child_item.time
+                       self.branch_count += child_item.branch_count
+               for child_item in self.child_items:
+                       child_item.data[4] = PercentToOneDP(child_item.time, self.time)
+                       child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
+
+# Context-sensitive call graph data model level one item
+
+class CallGraphLevelOneItem(CallGraphLevelItemBase):
+
+       def __init__(self, glb, row, comm_id, comm, parent_item):
+               super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item)
+               self.data = [comm, "", "", "", "", "", ""]
+               self.dbid = comm_id
+
+       def Select(self):
+               self.query_done = True;
+               query = QSqlQuery(self.glb.db)
+               QueryExec(query, "SELECT thread_id, pid, tid"
+                                       " FROM comm_threads"
+                                       " INNER JOIN threads ON thread_id = threads.id"
+                                       " WHERE comm_id = " + str(self.dbid))
+               while query.next():
+                       child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
+                       self.child_items.append(child_item)
+                       self.child_count += 1
+
+# Context-sensitive call graph data model root item
+
+class CallGraphRootItem(CallGraphLevelItemBase):
+
+       def __init__(self, glb):
+               super(CallGraphRootItem, self).__init__(glb, 0, None)
+               self.dbid = 0
+               self.query_done = True;
+               query = QSqlQuery(glb.db)
+               QueryExec(query, "SELECT id, comm FROM comms")
+               while query.next():
+                       if not query.value(0):
+                               continue
+                       child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self)
+                       self.child_items.append(child_item)
+                       self.child_count += 1
+
+# Context-sensitive call graph data model
+
+class CallGraphModel(TreeModel):
+
+       def __init__(self, glb, parent=None):
+               super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent)
+               self.glb = glb
+
+       def columnCount(self, parent=None):
+               return 7
+
+       def columnHeader(self, column):
+               headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+               return headers[column]
+
+       def columnAlignment(self, column):
+               alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
+               return alignment[column]
+
+       def FindSelect(self, value, pattern, query):
+               if pattern:
+                       # postgresql and sqlite pattern patching differences:
+                       #   postgresql LIKE is case sensitive but sqlite LIKE is not
+                       #   postgresql LIKE allows % and _ to be escaped with \ but sqlite LIKE does not
+                       #   postgresql supports ILIKE which is case insensitive
+                       #   sqlite supports GLOB (text only) which uses * and ? and is case sensitive
+                       if not self.glb.dbref.is_sqlite3:
+                               # Escape % and _
+                               s = value.replace("%", "\%")
+                               s = s.replace("_", "\_")
+                               # Translate * and ? into SQL LIKE pattern characters % and _
+                               trans = string.maketrans("*?", "%_")
+                               match = " LIKE '" + str(s).translate(trans) + "'"
+                       else:
+                               match = " GLOB '" + str(value) + "'"
+               else:
+                       match = " = '" + str(value) + "'"
+               QueryExec(query, "SELECT call_path_id, comm_id, thread_id"
+                                               " FROM calls"
+                                               " INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+                                               " INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+                                               " WHERE symbols.name" + match +
+                                               " GROUP BY comm_id, thread_id, call_path_id"
+                                               " ORDER BY comm_id, thread_id, call_path_id")
+
+       def FindPath(self, query):
+               # Turn the query result into a list of ids that the tree view can walk
+               # to open the tree at the right place.
+               ids = []
+               parent_id = query.value(0)
+               while parent_id:
+                       ids.insert(0, parent_id)
+                       q2 = QSqlQuery(self.glb.db)
+                       QueryExec(q2, "SELECT parent_id"
+                                       " FROM call_paths"
+                                       " WHERE id = " + str(parent_id))
+                       if not q2.next():
+                               break
+                       parent_id = q2.value(0)
+               # The call path root is not used
+               if ids[0] == 1:
+                       del ids[0]
+               ids.insert(0, query.value(2))
+               ids.insert(0, query.value(1))
+               return ids
+
+       def Found(self, query, found):
+               if found:
+                       return self.FindPath(query)
+               return []
+
+       def FindValue(self, value, pattern, query, last_value, last_pattern):
+               if last_value == value and pattern == last_pattern:
+                       found = query.first()
+               else:
+                       self.FindSelect(value, pattern, query)
+                       found = query.next()
+               return self.Found(query, found)
+
+       def FindNext(self, query):
+               found = query.next()
+               if not found:
+                       found = query.first()
+               return self.Found(query, found)
+
+       def FindPrev(self, query):
+               found = query.previous()
+               if not found:
+                       found = query.last()
+               return self.Found(query, found)
+
+       def FindThread(self, c):
+               if c.direction == 0 or c.value != c.last_value or c.pattern != c.last_pattern:
+                       ids = self.FindValue(c.value, c.pattern, c.query, c.last_value, c.last_pattern)
+               elif c.direction > 0:
+                       ids = self.FindNext(c.query)
+               else:
+                       ids = self.FindPrev(c.query)
+               return (True, ids)
+
+       def Find(self, value, direction, pattern, context, callback):
+               class Context():
+                       def __init__(self, *x):
+                               self.value, self.direction, self.pattern, self.query, self.last_value, self.last_pattern = x
+                       def Update(self, *x):
+                               self.value, self.direction, self.pattern, self.last_value, self.last_pattern = x + (self.value, self.pattern)
+               if len(context):
+                       context[0].Update(value, direction, pattern)
+               else:
+                       context.append(Context(value, direction, pattern, QSqlQuery(self.glb.db), None, None))
+               # Use a thread so the UI is not blocked during the SELECT
+               thread = Thread(self.FindThread, context[0])
+               thread.done.connect(lambda ids, t=thread, c=callback: self.FindDone(t, c, ids), Qt.QueuedConnection)
+               thread.start()
+
+       def FindDone(self, thread, callback, ids):
+               callback(ids)
+
+# Vertical widget layout
+
+class VBox():
+
+       def __init__(self, w1, w2, w3=None):
+               self.vbox = QWidget()
+               self.vbox.setLayout(QVBoxLayout());
+
+               self.vbox.layout().setContentsMargins(0, 0, 0, 0)
+
+               self.vbox.layout().addWidget(w1)
+               self.vbox.layout().addWidget(w2)
+               if w3:
+                       self.vbox.layout().addWidget(w3)
+
+       def Widget(self):
+               return self.vbox
+
+# Context-sensitive call graph window
+
+class CallGraphWindow(QMdiSubWindow):
+
+       def __init__(self, glb, parent=None):
+               super(CallGraphWindow, self).__init__(parent)
+
+               self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x))
+
+               self.view = QTreeView()
+               self.view.setModel(self.model)
+
+               for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)):
+                       self.view.setColumnWidth(c, w)
+
+               self.find_bar = FindBar(self, self)
+
+               self.vbox = VBox(self.view, self.find_bar.Widget())
+
+               self.setWidget(self.vbox.Widget())
+
+               AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph")
+
+       def DisplayFound(self, ids):
+               if not len(ids):
+                       return False
+               parent = QModelIndex()
+               for dbid in ids:
+                       found = False
+                       n = self.model.rowCount(parent)
+                       for row in xrange(n):
+                               child = self.model.index(row, 0, parent)
+                               if child.internalPointer().dbid == dbid:
+                                       found = True
+                                       self.view.setCurrentIndex(child)
+                                       parent = child
+                                       break
+                       if not found:
+                               break
+               return found
+
+       def Find(self, value, direction, pattern, context):
+               self.view.setFocus()
+               self.find_bar.Busy()
+               self.model.Find(value, direction, pattern, context, self.FindDone)
+
+       def FindDone(self, ids):
+               found = True
+               if not self.DisplayFound(ids):
+                       found = False
+               self.find_bar.Idle()
+               if not found:
+                       self.find_bar.NotFound()
+
+# Child data item  finder
+
+class ChildDataItemFinder():
+
+       def __init__(self, root):
+               self.root = root
+               self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (None,) * 5
+               self.rows = []
+               self.pos = 0
+
+       def FindSelect(self):
+               self.rows = []
+               if self.pattern:
+                       pattern = re.compile(self.value)
+                       for child in self.root.child_items:
+                               for column_data in child.data:
+                                       if re.search(pattern, str(column_data)) is not None:
+                                               self.rows.append(child.row)
+                                               break
+               else:
+                       for child in self.root.child_items:
+                               for column_data in child.data:
+                                       if self.value in str(column_data):
+                                               self.rows.append(child.row)
+                                               break
+
+       def FindValue(self):
+               self.pos = 0
+               if self.last_value != self.value or self.pattern != self.last_pattern:
+                       self.FindSelect()
+               if not len(self.rows):
+                       return -1
+               return self.rows[self.pos]
+
+       def FindThread(self):
+               if self.direction == 0 or self.value != self.last_value or self.pattern != self.last_pattern:
+                       row = self.FindValue()
+               elif len(self.rows):
+                       if self.direction > 0:
+                               self.pos += 1
+                               if self.pos >= len(self.rows):
+                                       self.pos = 0
+                       else:
+                               self.pos -= 1
+                               if self.pos < 0:
+                                       self.pos = len(self.rows) - 1
+                       row = self.rows[self.pos]
+               else:
+                       row = -1
+               return (True, row)
+
+       def Find(self, value, direction, pattern, context, callback):
+               self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (value, direction,pattern, self.value, self.pattern)
+               # Use a thread so the UI is not blocked
+               thread = Thread(self.FindThread)
+               thread.done.connect(lambda row, t=thread, c=callback: self.FindDone(t, c, row), Qt.QueuedConnection)
+               thread.start()
+
+       def FindDone(self, thread, callback, row):
+               callback(row)
+
+# Number of database records to fetch in one go
+
+glb_chunk_sz = 10000
+
+# size of pickled integer big enough for record size
+
+glb_nsz = 8
+
+# Background process for SQL data fetcher
+
+class SQLFetcherProcess():
+
+       def __init__(self, dbref, sql, buffer, head, tail, fetch_count, fetching_done, process_target, wait_event, fetched_event, prep):
+               # Need a unique connection name
+               conn_name = "SQLFetcher" + str(os.getpid())
+               self.db, dbname = dbref.Open(conn_name)
+               self.sql = sql
+               self.buffer = buffer
+               self.head = head
+               self.tail = tail
+               self.fetch_count = fetch_count
+               self.fetching_done = fetching_done
+               self.process_target = process_target
+               self.wait_event = wait_event
+               self.fetched_event = fetched_event
+               self.prep = prep
+               self.query = QSqlQuery(self.db)
+               self.query_limit = 0 if "$$last_id$$" in sql else 2
+               self.last_id = -1
+               self.fetched = 0
+               self.more = True
+               self.local_head = self.head.value
+               self.local_tail = self.tail.value
+
+       def Select(self):
+               if self.query_limit:
+                       if self.query_limit == 1:
+                               return
+                       self.query_limit -= 1
+               stmt = self.sql.replace("$$last_id$$", str(self.last_id))
+               QueryExec(self.query, stmt)
+
+       def Next(self):
+               if not self.query.next():
+                       self.Select()
+                       if not self.query.next():
+                               return None
+               self.last_id = self.query.value(0)
+               return self.prep(self.query)
+
+       def WaitForTarget(self):
+               while True:
+                       self.wait_event.clear()
+                       target = self.process_target.value
+                       if target > self.fetched or target < 0:
+                               break
+                       self.wait_event.wait()
+               return target
+
+       def HasSpace(self, sz):
+               if self.local_tail <= self.local_head:
+                       space = len(self.buffer) - self.local_head
+                       if space > sz:
+                               return True
+                       if space >= glb_nsz:
+                               # Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer
+                               nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL)
+                               self.buffer[self.local_head : self.local_head + len(nd)] = nd
+                       self.local_head = 0
+               if self.local_tail - self.local_head > sz:
+                       return True
+               return False
+
+       def WaitForSpace(self, sz):
+               if self.HasSpace(sz):
+                       return
+               while True:
+                       self.wait_event.clear()
+                       self.local_tail = self.tail.value
+                       if self.HasSpace(sz):
+                               return
+                       self.wait_event.wait()
+
+       def AddToBuffer(self, obj):
+               d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL)
+               n = len(d)
+               nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL)
+               sz = n + glb_nsz
+               self.WaitForSpace(sz)
+               pos = self.local_head
+               self.buffer[pos : pos + len(nd)] = nd
+               self.buffer[pos + glb_nsz : pos + sz] = d
+               self.local_head += sz
+
+       def FetchBatch(self, batch_size):
+               fetched = 0
+               while batch_size > fetched:
+                       obj = self.Next()
+                       if obj is None:
+                               self.more = False
+                               break
+                       self.AddToBuffer(obj)
+                       fetched += 1
+               if fetched:
+                       self.fetched += fetched
+                       with self.fetch_count.get_lock():
+                               self.fetch_count.value += fetched
+                       self.head.value = self.local_head
+                       self.fetched_event.set()
+
+       def Run(self):
+               while self.more:
+                       target = self.WaitForTarget()
+                       if target < 0:
+                               break
+                       batch_size = min(glb_chunk_sz, target - self.fetched)
+                       self.FetchBatch(batch_size)
+               self.fetching_done.value = True
+               self.fetched_event.set()
+
+def SQLFetcherFn(*x):
+       process = SQLFetcherProcess(*x)
+       process.Run()
+
+# SQL data fetcher
+
+class SQLFetcher(QObject):
+
+       done = Signal(object)
+
+       def __init__(self, glb, sql, prep, process_data, parent=None):
+               super(SQLFetcher, self).__init__(parent)
+               self.process_data = process_data
+               self.more = True
+               self.target = 0
+               self.last_target = 0
+               self.fetched = 0
+               self.buffer_size = 16 * 1024 * 1024
+               self.buffer = Array(c_char, self.buffer_size, lock=False)
+               self.head = Value(c_longlong)
+               self.tail = Value(c_longlong)
+               self.local_tail = 0
+               self.fetch_count = Value(c_longlong)
+               self.fetching_done = Value(c_bool)
+               self.last_count = 0
+               self.process_target = Value(c_longlong)
+               self.wait_event = Event()
+               self.fetched_event = Event()
+               glb.AddInstanceToShutdownOnExit(self)
+               self.process = Process(target=SQLFetcherFn, args=(glb.dbref, sql, self.buffer, self.head, self.tail, self.fetch_count, self.fetching_done, self.process_target, self.wait_event, self.fetched_event, prep))
+               self.process.start()
+               self.thread = Thread(self.Thread)
+               self.thread.done.connect(self.ProcessData, Qt.QueuedConnection)
+               self.thread.start()
+
+       def Shutdown(self):
+               # Tell the thread and process to exit
+               self.process_target.value = -1
+               self.wait_event.set()
+               self.more = False
+               self.fetching_done.value = True
+               self.fetched_event.set()
+
+       def Thread(self):
+               if not self.more:
+                       return True, 0
+               while True:
+                       self.fetched_event.clear()
+                       fetch_count = self.fetch_count.value
+                       if fetch_count != self.last_count:
+                               break
+                       if self.fetching_done.value:
+                               self.more = False
+                               return True, 0
+                       self.fetched_event.wait()
+               count = fetch_count - self.last_count
+               self.last_count = fetch_count
+               self.fetched += count
+               return False, count
+
+       def Fetch(self, nr):
+               if not self.more:
+                       # -1 inidcates there are no more
+                       return -1
+               result = self.fetched
+               extra = result + nr - self.target
+               if extra > 0:
+                       self.target += extra
+                       # process_target < 0 indicates shutting down
+                       if self.process_target.value >= 0:
+                               self.process_target.value = self.target
+                       self.wait_event.set()
+               return result
+
+       def RemoveFromBuffer(self):
+               pos = self.local_tail
+               if len(self.buffer) - pos < glb_nsz:
+                       pos = 0
+               n = cPickle.loads(self.buffer[pos : pos + glb_nsz])
+               if n == 0:
+                       pos = 0
+                       n = cPickle.loads(self.buffer[0 : glb_nsz])
+               pos += glb_nsz
+               obj = cPickle.loads(self.buffer[pos : pos + n])
+               self.local_tail = pos + n
+               return obj
+
+       def ProcessData(self, count):
+               for i in xrange(count):
+                       obj = self.RemoveFromBuffer()
+                       self.process_data(obj)
+               self.tail.value = self.local_tail
+               self.wait_event.set()
+               self.done.emit(count)
+
+# Fetch more records bar
+
+class FetchMoreRecordsBar():
+
+       def __init__(self, model, parent):
+               self.model = model
+
+               self.label = QLabel("Number of records (x " + "{:,}".format(glb_chunk_sz) + ") to fetch:")
+               self.label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+               self.fetch_count = QSpinBox()
+               self.fetch_count.setRange(1, 1000000)
+               self.fetch_count.setValue(10)
+               self.fetch_count.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+               self.fetch = QPushButton("Go!")
+               self.fetch.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+               self.fetch.released.connect(self.FetchMoreRecords)
+
+               self.progress = QProgressBar()
+               self.progress.setRange(0, 100)
+               self.progress.hide()
+
+               self.done_label = QLabel("All records fetched")
+               self.done_label.hide()
+
+               self.spacer = QLabel("")
+
+               self.close_button = QToolButton()
+               self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+               self.close_button.released.connect(self.Deactivate)
+
+               self.hbox = QHBoxLayout()
+               self.hbox.setContentsMargins(0, 0, 0, 0)
+
+               self.hbox.addWidget(self.label)
+               self.hbox.addWidget(self.fetch_count)
+               self.hbox.addWidget(self.fetch)
+               self.hbox.addWidget(self.spacer)
+               self.hbox.addWidget(self.progress)
+               self.hbox.addWidget(self.done_label)
+               self.hbox.addWidget(self.close_button)
+
+               self.bar = QWidget()
+               self.bar.setLayout(self.hbox);
+               self.bar.show()
+
+               self.in_progress = False
+               self.model.progress.connect(self.Progress)
+
+               self.done = False
+
+               if not model.HasMoreRecords():
+                       self.Done()
+
+       def Widget(self):
+               return self.bar
+
+       def Activate(self):
+               self.bar.show()
+               self.fetch.setFocus()
+
+       def Deactivate(self):
+               self.bar.hide()
+
+       def Enable(self, enable):
+               self.fetch.setEnabled(enable)
+               self.fetch_count.setEnabled(enable)
+
+       def Busy(self):
+               self.Enable(False)
+               self.fetch.hide()
+               self.spacer.hide()
+               self.progress.show()
+
+       def Idle(self):
+               self.in_progress = False
+               self.Enable(True)
+               self.progress.hide()
+               self.fetch.show()
+               self.spacer.show()
+
+       def Target(self):
+               return self.fetch_count.value() * glb_chunk_sz
+
+       def Done(self):
+               self.done = True
+               self.Idle()
+               self.label.hide()
+               self.fetch_count.hide()
+               self.fetch.hide()
+               self.spacer.hide()
+               self.done_label.show()
+
+       def Progress(self, count):
+               if self.in_progress:
+                       if count:
+                               percent = ((count - self.start) * 100) / self.Target()
+                               if percent >= 100:
+                                       self.Idle()
+                               else:
+                                       self.progress.setValue(percent)
+               if not count:
+                       # Count value of zero means no more records
+                       self.Done()
+
+       def FetchMoreRecords(self):
+               if self.done:
+                       return
+               self.progress.setValue(0)
+               self.Busy()
+               self.in_progress = True
+               self.start = self.model.FetchMoreRecords(self.Target())
+
+# Brance data model level two item
+
+class BranchLevelTwoItem():
+
+       def __init__(self, row, text, parent_item):
+               self.row = row
+               self.parent_item = parent_item
+               self.data = [""] * 8
+               self.data[7] = text
+               self.level = 2
+
+       def getParentItem(self):
+               return self.parent_item
+
+       def getRow(self):
+               return self.row
+
+       def childCount(self):
+               return 0
+
+       def hasChildren(self):
+               return False
+
+       def getData(self, column):
+               return self.data[column]
+
+# Brance data model level one item
+
+class BranchLevelOneItem():
+
+       def __init__(self, glb, row, data, parent_item):
+               self.glb = glb
+               self.row = row
+               self.parent_item = parent_item
+               self.child_count = 0
+               self.child_items = []
+               self.data = data[1:]
+               self.dbid = data[0]
+               self.level = 1
+               self.query_done = False
+
+       def getChildItem(self, row):
+               return self.child_items[row]
+
+       def getParentItem(self):
+               return self.parent_item
+
+       def getRow(self):
+               return self.row
+
+       def Select(self):
+               self.query_done = True
+
+               if not self.glb.have_disassembler:
+                       return
+
+               query = QSqlQuery(self.glb.db)
+
+               QueryExec(query, "SELECT cpu, to_dso_id, to_symbol_id, to_sym_offset, short_name, long_name, build_id, sym_start, to_ip"
+                                 " FROM samples"
+                                 " INNER JOIN dsos ON samples.to_dso_id = dsos.id"
+                                 " INNER JOIN symbols ON samples.to_symbol_id = symbols.id"
+                                 " WHERE samples.id = " + str(self.dbid))
+               if not query.next():
+                       return
+               cpu = query.value(0)
+               dso = query.value(1)
+               sym = query.value(2)
+               if dso == 0 or sym == 0:
+                       return
+               off = query.value(3)
+               short_name = query.value(4)
+               long_name = query.value(5)
+               build_id = query.value(6)
+               sym_start = query.value(7)
+               ip = query.value(8)
+
+               QueryExec(query, "SELECT samples.dso_id, symbol_id, sym_offset, sym_start"
+                                 " FROM samples"
+                                 " INNER JOIN symbols ON samples.symbol_id = symbols.id"
+                                 " WHERE samples.id > " + str(self.dbid) + " AND cpu = " + str(cpu) +
+                                 " ORDER BY samples.id"
+                                 " LIMIT 1")
+               if not query.next():
+                       return
+               if query.value(0) != dso:
+                       # Cannot disassemble from one dso to another
+                       return
+               bsym = query.value(1)
+               boff = query.value(2)
+               bsym_start = query.value(3)
+               if bsym == 0:
+                       return
+               tot = bsym_start + boff + 1 - sym_start - off
+               if tot <= 0 or tot > 16384:
+                       return
+
+               inst = self.glb.disassembler.Instruction()
+               f = self.glb.FileFromNamesAndBuildId(short_name, long_name, build_id)
+               if not f:
+                       return
+               mode = 0 if Is64Bit(f) else 1
+               self.glb.disassembler.SetMode(inst, mode)
+
+               buf_sz = tot + 16
+               buf = create_string_buffer(tot + 16)
+               f.seek(sym_start + off)
+               buf.value = f.read(buf_sz)
+               buf_ptr = addressof(buf)
+               i = 0
+               while tot > 0:
+                       cnt, text = self.glb.disassembler.DisassembleOne(inst, buf_ptr, buf_sz, ip)
+                       if cnt:
+                               byte_str = tohex(ip).rjust(16)
+                               for k in xrange(cnt):
+                                       byte_str += " %02x" % ord(buf[i])
+                                       i += 1
+                               while k < 15:
+                                       byte_str += "   "
+                                       k += 1
+                               self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self))
+                               self.child_count += 1
+                       else:
+                               return
+                       buf_ptr += cnt
+                       tot -= cnt
+                       buf_sz -= cnt
+                       ip += cnt
+
+       def childCount(self):
+               if not self.query_done:
+                       self.Select()
+                       if not self.child_count:
+                               return -1
+               return self.child_count
+
+       def hasChildren(self):
+               if not self.query_done:
+                       return True
+               return self.child_count > 0
+
+       def getData(self, column):
+               return self.data[column]
+
+# Brance data model root item
+
+class BranchRootItem():
+
+       def __init__(self):
+               self.child_count = 0
+               self.child_items = []
+               self.level = 0
+
+       def getChildItem(self, row):
+               return self.child_items[row]
+
+       def getParentItem(self):
+               return None
+
+       def getRow(self):
+               return 0
+
+       def childCount(self):
+               return self.child_count
+
+       def hasChildren(self):
+               return self.child_count > 0
+
+       def getData(self, column):
+               return ""
+
+# Branch data preparation
+
+def BranchDataPrep(query):
+       data = []
+       for i in xrange(0, 8):
+               data.append(query.value(i))
+       data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
+                       " (" + dsoname(query.value(11)) + ")" + " -> " +
+                       tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
+                       " (" + dsoname(query.value(15)) + ")")
+       return data
+
+# Branch data model
+
+class BranchModel(TreeModel):
+
+       progress = Signal(object)
+
+       def __init__(self, glb, event_id, where_clause, parent=None):
+               super(BranchModel, self).__init__(BranchRootItem(), parent)
+               self.glb = glb
+               self.event_id = event_id
+               self.more = True
+               self.populated = 0
+               sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name,"
+                       " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END,"
+                       " ip, symbols.name, sym_offset, dsos.short_name,"
+                       " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name"
+                       " FROM samples"
+                       " INNER JOIN comms ON comm_id = comms.id"
+                       " INNER JOIN threads ON thread_id = threads.id"
+                       " INNER JOIN branch_types ON branch_type = branch_types.id"
+                       " INNER JOIN symbols ON symbol_id = symbols.id"
+                       " INNER JOIN symbols to_symbols ON to_symbol_id = to_symbols.id"
+                       " INNER JOIN dsos ON samples.dso_id = dsos.id"
+                       " INNER JOIN dsos AS to_dsos ON samples.to_dso_id = to_dsos.id"
+                       " WHERE samples.id > $$last_id$$" + where_clause +
+                       " AND evsel_id = " + str(self.event_id) +
+                       " ORDER BY samples.id"
+                       " LIMIT " + str(glb_chunk_sz))
+               self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample)
+               self.fetcher.done.connect(self.Update)
+               self.fetcher.Fetch(glb_chunk_sz)
+
+       def columnCount(self, parent=None):
+               return 8
+
+       def columnHeader(self, column):
+               return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column]
+
+       def columnFont(self, column):
+               if column != 7:
+                       return None
+               return QFont("Monospace")
+
+       def DisplayData(self, item, index):
+               if item.level == 1:
+                       self.FetchIfNeeded(item.row)
+               return item.getData(index.column())
+
+       def AddSample(self, data):
+               child = BranchLevelOneItem(self.glb, self.populated, data, self.root)
+               self.root.child_items.append(child)
+               self.populated += 1
+
+       def Update(self, fetched):
+               if not fetched:
+                       self.more = False
+                       self.progress.emit(0)
+               child_count = self.root.child_count
+               count = self.populated - child_count
+               if count > 0:
+                       parent = QModelIndex()
+                       self.beginInsertRows(parent, child_count, child_count + count - 1)
+                       self.insertRows(child_count, count, parent)
+                       self.root.child_count += count
+                       self.endInsertRows()
+                       self.progress.emit(self.root.child_count)
+
+       def FetchMoreRecords(self, count):
+               current = self.root.child_count
+               if self.more:
+                       self.fetcher.Fetch(count)
+               else:
+                       self.progress.emit(0)
+               return current
+
+       def HasMoreRecords(self):
+               return self.more
+
+# Branch window
+
+class BranchWindow(QMdiSubWindow):
+
+       def __init__(self, glb, event_id, name, where_clause, parent=None):
+               super(BranchWindow, self).__init__(parent)
+
+               model_name = "Branch Events " + str(event_id)
+               if len(where_clause):
+                       model_name = where_clause + " " + model_name
+
+               self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause))
+
+               self.view = QTreeView()
+               self.view.setUniformRowHeights(True)
+               self.view.setModel(self.model)
+
+               self.ResizeColumnsToContents()
+
+               self.find_bar = FindBar(self, self, True)
+
+               self.finder = ChildDataItemFinder(self.model.root)
+
+               self.fetch_bar = FetchMoreRecordsBar(self.model, self)
+
+               self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+               self.setWidget(self.vbox.Widget())
+
+               AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events")
+
+       def ResizeColumnToContents(self, column, n):
+               # Using the view's resizeColumnToContents() here is extrememly slow
+               # so implement a crude alternative
+               mm = "MM" if column else "MMMM"
+               font = self.view.font()
+               metrics = QFontMetrics(font)
+               max = 0
+               for row in xrange(n):
+                       val = self.model.root.child_items[row].data[column]
+                       len = metrics.width(str(val) + mm)
+                       max = len if len > max else max
+               val = self.model.columnHeader(column)
+               len = metrics.width(str(val) + mm)
+               max = len if len > max else max
+               self.view.setColumnWidth(column, max)
+
+       def ResizeColumnsToContents(self):
+               n = min(self.model.root.child_count, 100)
+               if n < 1:
+                       # No data yet, so connect a signal to notify when there is
+                       self.model.rowsInserted.connect(self.UpdateColumnWidths)
+                       return
+               columns = self.model.columnCount()
+               for i in xrange(columns):
+                       self.ResizeColumnToContents(i, n)
+
+       def UpdateColumnWidths(self, *x):
+               # This only needs to be done once, so disconnect the signal now
+               self.model.rowsInserted.disconnect(self.UpdateColumnWidths)
+               self.ResizeColumnsToContents()
+
+       def Find(self, value, direction, pattern, context):
+               self.view.setFocus()
+               self.find_bar.Busy()
+               self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+       def FindDone(self, row):
+               self.find_bar.Idle()
+               if row >= 0:
+                       self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
+               else:
+                       self.find_bar.NotFound()
+
+# Event list
+
+def GetEventList(db):
+       events = []
+       query = QSqlQuery(db)
+       QueryExec(query, "SELECT name FROM selected_events WHERE id > 0 ORDER BY id")
+       while query.next():
+               events.append(query.value(0))
+       return events
+
+# SQL data preparation
+
+def SQLTableDataPrep(query, count):
+       data = []
+       for i in xrange(count):
+               data.append(query.value(i))
+       return data
+
+# SQL table data model item
+
+class SQLTableItem():
+
+       def __init__(self, row, data):
+               self.row = row
+               self.data = data
+
+       def getData(self, column):
+               return self.data[column]
+
+# SQL table data model
+
+class SQLTableModel(TableModel):
+
+       progress = Signal(object)
+
+       def __init__(self, glb, sql, column_count, parent=None):
+               super(SQLTableModel, self).__init__(parent)
+               self.glb = glb
+               self.more = True
+               self.populated = 0
+               self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample)
+               self.fetcher.done.connect(self.Update)
+               self.fetcher.Fetch(glb_chunk_sz)
+
+       def DisplayData(self, item, index):
+               self.FetchIfNeeded(item.row)
+               return item.getData(index.column())
+
+       def AddSample(self, data):
+               child = SQLTableItem(self.populated, data)
+               self.child_items.append(child)
+               self.populated += 1
+
+       def Update(self, fetched):
+               if not fetched:
+                       self.more = False
+                       self.progress.emit(0)
+               child_count = self.child_count
+               count = self.populated - child_count
+               if count > 0:
+                       parent = QModelIndex()
+                       self.beginInsertRows(parent, child_count, child_count + count - 1)
+                       self.insertRows(child_count, count, parent)
+                       self.child_count += count
+                       self.endInsertRows()
+                       self.progress.emit(self.child_count)
+
+       def FetchMoreRecords(self, count):
+               current = self.child_count
+               if self.more:
+                       self.fetcher.Fetch(count)
+               else:
+                       self.progress.emit(0)
+               return current
+
+       def HasMoreRecords(self):
+               return self.more
+
+# SQL automatic table data model
+
+class SQLAutoTableModel(SQLTableModel):
+
+       def __init__(self, glb, table_name, parent=None):
+               sql = "SELECT * FROM " + table_name + " WHERE id > $$last_id$$ ORDER BY id LIMIT " + str(glb_chunk_sz)
+               if table_name == "comm_threads_view":
+                       # For now, comm_threads_view has no id column
+                       sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz)
+               self.column_headers = []
+               query = QSqlQuery(glb.db)
+               if glb.dbref.is_sqlite3:
+                       QueryExec(query, "PRAGMA table_info(" + table_name + ")")
+                       while query.next():
+                               self.column_headers.append(query.value(1))
+                       if table_name == "sqlite_master":
+                               sql = "SELECT * FROM " + table_name
+               else:
+                       if table_name[:19] == "information_schema.":
+                               sql = "SELECT * FROM " + table_name
+                               select_table_name = table_name[19:]
+                               schema = "information_schema"
+                       else:
+                               select_table_name = table_name
+                               schema = "public"
+                       QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'")
+                       while query.next():
+                               self.column_headers.append(query.value(0))
+               super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent)
+
+       def columnCount(self, parent=None):
+               return len(self.column_headers)
+
+       def columnHeader(self, column):
+               return self.column_headers[column]
+
+# Base class for custom ResizeColumnsToContents
+
+class ResizeColumnsToContentsBase(QObject):
+
+       def __init__(self, parent=None):
+               super(ResizeColumnsToContentsBase, self).__init__(parent)
+
+       def ResizeColumnToContents(self, column, n):
+               # Using the view's resizeColumnToContents() here is extrememly slow
+               # so implement a crude alternative
+               font = self.view.font()
+               metrics = QFontMetrics(font)
+               max = 0
+               for row in xrange(n):
+                       val = self.data_model.child_items[row].data[column]
+                       len = metrics.width(str(val) + "MM")
+                       max = len if len > max else max
+               val = self.data_model.columnHeader(column)
+               len = metrics.width(str(val) + "MM")
+               max = len if len > max else max
+               self.view.setColumnWidth(column, max)
+
+       def ResizeColumnsToContents(self):
+               n = min(self.data_model.child_count, 100)
+               if n < 1:
+                       # No data yet, so connect a signal to notify when there is
+                       self.data_model.rowsInserted.connect(self.UpdateColumnWidths)
+                       return
+               columns = self.data_model.columnCount()
+               for i in xrange(columns):
+                       self.ResizeColumnToContents(i, n)
+
+       def UpdateColumnWidths(self, *x):
+               # This only needs to be done once, so disconnect the signal now
+               self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths)
+               self.ResizeColumnsToContents()
+
+# Table window
+
+class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
+
+       def __init__(self, glb, table_name, parent=None):
+               super(TableWindow, self).__init__(parent)
+
+               self.data_model = LookupCreateModel(table_name + " Table", lambda: SQLAutoTableModel(glb, table_name))
+
+               self.model = QSortFilterProxyModel()
+               self.model.setSourceModel(self.data_model)
+
+               self.view = QTableView()
+               self.view.setModel(self.model)
+               self.view.setEditTriggers(QAbstractItemView.NoEditTriggers)
+               self.view.verticalHeader().setVisible(False)
+               self.view.sortByColumn(-1, Qt.AscendingOrder)
+               self.view.setSortingEnabled(True)
+
+               self.ResizeColumnsToContents()
+
+               self.find_bar = FindBar(self, self, True)
+
+               self.finder = ChildDataItemFinder(self.data_model)
+
+               self.fetch_bar = FetchMoreRecordsBar(self.data_model, self)
+
+               self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+               self.setWidget(self.vbox.Widget())
+
+               AddSubWindow(glb.mainwindow.mdi_area, self, table_name + " Table")
+
+       def Find(self, value, direction, pattern, context):
+               self.view.setFocus()
+               self.find_bar.Busy()
+               self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+       def FindDone(self, row):
+               self.find_bar.Idle()
+               if row >= 0:
+                       self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
+               else:
+                       self.find_bar.NotFound()
+
+# Table list
+
+def GetTableList(glb):
+       tables = []
+       query = QSqlQuery(glb.db)
+       if glb.dbref.is_sqlite3:
+               QueryExec(query, "SELECT name FROM sqlite_master WHERE type IN ( 'table' , 'view' ) ORDER BY name")
+       else:
+               QueryExec(query, "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type IN ( 'BASE TABLE' , 'VIEW' ) ORDER BY table_name")
+       while query.next():
+               tables.append(query.value(0))
+       if glb.dbref.is_sqlite3:
+               tables.append("sqlite_master")
+       else:
+               tables.append("information_schema.tables")
+               tables.append("information_schema.views")
+               tables.append("information_schema.columns")
+       return tables
+
+# Action Definition
+
+def CreateAction(label, tip, callback, parent=None, shortcut=None):
+       action = QAction(label, parent)
+       if shortcut != None:
+               action.setShortcuts(shortcut)
+       action.setStatusTip(tip)
+       action.triggered.connect(callback)
+       return action
+
+# Typical application actions
+
+def CreateExitAction(app, parent=None):
+       return CreateAction("&Quit", "Exit the application", app.closeAllWindows, parent, QKeySequence.Quit)
+
+# Typical MDI actions
+
+def CreateCloseActiveWindowAction(mdi_area):
+       return CreateAction("Cl&ose", "Close the active window", mdi_area.closeActiveSubWindow, mdi_area)
+
+def CreateCloseAllWindowsAction(mdi_area):
+       return CreateAction("Close &All", "Close all the windows", mdi_area.closeAllSubWindows, mdi_area)
+
+def CreateTileWindowsAction(mdi_area):
+       return CreateAction("&Tile", "Tile the windows", mdi_area.tileSubWindows, mdi_area)
+
+def CreateCascadeWindowsAction(mdi_area):
+       return CreateAction("&Cascade", "Cascade the windows", mdi_area.cascadeSubWindows, mdi_area)
+
+def CreateNextWindowAction(mdi_area):
+       return CreateAction("Ne&xt", "Move the focus to the next window", mdi_area.activateNextSubWindow, mdi_area, QKeySequence.NextChild)
+
+def CreatePreviousWindowAction(mdi_area):
+       return CreateAction("Pre&vious", "Move the focus to the previous window", mdi_area.activatePreviousSubWindow, mdi_area, QKeySequence.PreviousChild)
+
+# Typical MDI window menu
+
+class WindowMenu():
+
+       def __init__(self, mdi_area, menu):
+               self.mdi_area = mdi_area
+               self.window_menu = menu.addMenu("&Windows")
+               self.close_active_window = CreateCloseActiveWindowAction(mdi_area)
+               self.close_all_windows = CreateCloseAllWindowsAction(mdi_area)
+               self.tile_windows = CreateTileWindowsAction(mdi_area)
+               self.cascade_windows = CreateCascadeWindowsAction(mdi_area)
+               self.next_window = CreateNextWindowAction(mdi_area)
+               self.previous_window = CreatePreviousWindowAction(mdi_area)
+               self.window_menu.aboutToShow.connect(self.Update)
+
+       def Update(self):
+               self.window_menu.clear()
+               sub_window_count = len(self.mdi_area.subWindowList())
+               have_sub_windows = sub_window_count != 0
+               self.close_active_window.setEnabled(have_sub_windows)
+               self.close_all_windows.setEnabled(have_sub_windows)
+               self.tile_windows.setEnabled(have_sub_windows)
+               self.cascade_windows.setEnabled(have_sub_windows)
+               self.next_window.setEnabled(have_sub_windows)
+               self.previous_window.setEnabled(have_sub_windows)
+               self.window_menu.addAction(self.close_active_window)
+               self.window_menu.addAction(self.close_all_windows)
+               self.window_menu.addSeparator()
+               self.window_menu.addAction(self.tile_windows)
+               self.window_menu.addAction(self.cascade_windows)
+               self.window_menu.addSeparator()
+               self.window_menu.addAction(self.next_window)
+               self.window_menu.addAction(self.previous_window)
+               if sub_window_count == 0:
+                       return
+               self.window_menu.addSeparator()
+               nr = 1
+               for sub_window in self.mdi_area.subWindowList():
+                       label = str(nr) + " " + sub_window.name
+                       if nr < 10:
+                               label = "&" + label
+                       action = self.window_menu.addAction(label)
+                       action.setCheckable(True)
+                       action.setChecked(sub_window == self.mdi_area.activeSubWindow())
+                       action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x))
+                       self.window_menu.addAction(action)
+                       nr += 1
+
+       def setActiveSubWindow(self, nr):
+               self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1])
+
+# Font resize
+
+def ResizeFont(widget, diff):
+       font = widget.font()
+       sz = font.pointSize()
+       font.setPointSize(sz + diff)
+       widget.setFont(font)
+
+def ShrinkFont(widget):
+       ResizeFont(widget, -1)
+
+def EnlargeFont(widget):
+       ResizeFont(widget, 1)
+
+# Unique name for sub-windows
+
+def NumberedWindowName(name, nr):
+       if nr > 1:
+               name += " <" + str(nr) + ">"
+       return name
+
+def UniqueSubWindowName(mdi_area, name):
+       nr = 1
+       while True:
+               unique_name = NumberedWindowName(name, nr)
+               ok = True
+               for sub_window in mdi_area.subWindowList():
+                       if sub_window.name == unique_name:
+                               ok = False
+                               break
+               if ok:
+                       return unique_name
+               nr += 1
+
+# Add a sub-window
+
+def AddSubWindow(mdi_area, sub_window, name):
+       unique_name = UniqueSubWindowName(mdi_area, name)
+       sub_window.setMinimumSize(200, 100)
+       sub_window.resize(800, 600)
+       sub_window.setWindowTitle(unique_name)
+       sub_window.setAttribute(Qt.WA_DeleteOnClose)
+       sub_window.setWindowIcon(sub_window.style().standardIcon(QStyle.SP_FileIcon))
+       sub_window.name = unique_name
+       mdi_area.addSubWindow(sub_window)
+       sub_window.show()
+
+# Main window
+
+class MainWindow(QMainWindow):
+
+       def __init__(self, glb, parent=None):
+               super(MainWindow, self).__init__(parent)
+
+               self.glb = glb
+
+               self.setWindowTitle("Exported SQL Viewer: " + glb.dbname)
+               self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
+               self.setMinimumSize(200, 100)
+
+               self.mdi_area = QMdiArea()
+               self.mdi_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+               self.mdi_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+
+               self.setCentralWidget(self.mdi_area)
+
+               menu = self.menuBar()
+
+               file_menu = menu.addMenu("&File")
+               file_menu.addAction(CreateExitAction(glb.app, self))
+
+               edit_menu = menu.addMenu("&Edit")
+               edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find))
+               edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)]))
+               edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")]))
+               edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")]))
+
+               reports_menu = menu.addMenu("&Reports")
+               reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
+
+               self.EventMenu(GetEventList(glb.db), reports_menu)
+
+               self.TableMenu(GetTableList(glb), menu)
+
+               self.window_menu = WindowMenu(self.mdi_area, menu)
+
+       def Find(self):
+               win = self.mdi_area.activeSubWindow()
+               if win:
+                       try:
+                               win.find_bar.Activate()
+                       except:
+                               pass
+
+       def FetchMoreRecords(self):
+               win = self.mdi_area.activeSubWindow()
+               if win:
+                       try:
+                               win.fetch_bar.Activate()
+                       except:
+                               pass
+
+       def ShrinkFont(self):
+               win = self.mdi_area.activeSubWindow()
+               ShrinkFont(win.view)
+
+       def EnlargeFont(self):
+               win = self.mdi_area.activeSubWindow()
+               EnlargeFont(win.view)
+
+       def EventMenu(self, events, reports_menu):
+               branches_events = 0
+               for event in events:
+                       event = event.split(":")[0]
+                       if event == "branches":
+                               branches_events += 1
+               dbid = 0
+               for event in events:
+                       dbid += 1
+                       event = event.split(":")[0]
+                       if event == "branches":
+                               label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")"
+                               reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self))
+
+       def TableMenu(self, tables, menu):
+               table_menu = menu.addMenu("&Tables")
+               for table in tables:
+                       table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self))
+
+       def NewCallGraph(self):
+               CallGraphWindow(self.glb, self)
+
+       def NewBranchView(self, event_id):
+               BranchWindow(self.glb, event_id, "", "", self)
+
+       def NewTableView(self, table_name):
+               TableWindow(self.glb, table_name, self)
+
+# XED Disassembler
+
+class xed_state_t(Structure):
+
+       _fields_ = [
+               ("mode", c_int),
+               ("width", c_int)
+       ]
+
+class XEDInstruction():
+
+       def __init__(self, libxed):
+               # Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion
+               xedd_t = c_byte * 512
+               self.xedd = xedd_t()
+               self.xedp = addressof(self.xedd)
+               libxed.xed_decoded_inst_zero(self.xedp)
+               self.state = xed_state_t()
+               self.statep = addressof(self.state)
+               # Buffer for disassembled instruction text
+               self.buffer = create_string_buffer(256)
+               self.bufferp = addressof(self.buffer)
+
+class LibXED():
+
+       def __init__(self):
+               self.libxed = CDLL("libxed.so")
+
+               self.xed_tables_init = self.libxed.xed_tables_init
+               self.xed_tables_init.restype = None
+               self.xed_tables_init.argtypes = []
+
+               self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero
+               self.xed_decoded_inst_zero.restype = None
+               self.xed_decoded_inst_zero.argtypes = [ c_void_p ]
+
+               self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode
+               self.xed_operand_values_set_mode.restype = None
+               self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ]
+
+               self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode
+               self.xed_decoded_inst_zero_keep_mode.restype = None
+               self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ]
+
+               self.xed_decode = self.libxed.xed_decode
+               self.xed_decode.restype = c_int
+               self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ]
+
+               self.xed_format_context = self.libxed.xed_format_context
+               self.xed_format_context.restype = c_uint
+               self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ]
+
+               self.xed_tables_init()
+
+       def Instruction(self):
+               return XEDInstruction(self)
+
+       def SetMode(self, inst, mode):
+               if mode:
+                       inst.state.mode = 4 # 32-bit
+                       inst.state.width = 4 # 4 bytes
+               else:
+                       inst.state.mode = 1 # 64-bit
+                       inst.state.width = 8 # 8 bytes
+               self.xed_operand_values_set_mode(inst.xedp, inst.statep)
+
+       def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip):
+               self.xed_decoded_inst_zero_keep_mode(inst.xedp)
+               err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt)
+               if err:
+                       return 0, ""
+               # Use AT&T mode (2), alternative is Intel (3)
+               ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0)
+               if not ok:
+                       return 0, ""
+               # Return instruction length and the disassembled instruction text
+               # For now, assume the length is in byte 166
+               return inst.xedd[166], inst.buffer.value
+
+def TryOpen(file_name):
+       try:
+               return open(file_name, "rb")
+       except:
+               return None
+
+def Is64Bit(f):
+       result = sizeof(c_void_p)
+       # ELF support only
+       pos = f.tell()
+       f.seek(0)
+       header = f.read(7)
+       f.seek(pos)
+       magic = header[0:4]
+       eclass = ord(header[4])
+       encoding = ord(header[5])
+       version = ord(header[6])
+       if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1:
+               result = True if eclass == 2 else False
+       return result
+
+# Global data
+
+class Glb():
+
+       def __init__(self, dbref, db, dbname):
+               self.dbref = dbref
+               self.db = db
+               self.dbname = dbname
+               self.home_dir = os.path.expanduser("~")
+               self.buildid_dir = os.getenv("PERF_BUILDID_DIR")
+               if self.buildid_dir:
+                       self.buildid_dir += "/.build-id/"
+               else:
+                       self.buildid_dir = self.home_dir + "/.debug/.build-id/"
+               self.app = None
+               self.mainwindow = None
+               self.instances_to_shutdown_on_exit = weakref.WeakSet()
+               try:
+                       self.disassembler = LibXED()
+                       self.have_disassembler = True
+               except:
+                       self.have_disassembler = False
+
+       def FileFromBuildId(self, build_id):
+               file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf"
+               return TryOpen(file_name)
+
+       def FileFromNamesAndBuildId(self, short_name, long_name, build_id):
+               # Assume current machine i.e. no support for virtualization
+               if short_name[0:7] == "[kernel" and os.path.basename(long_name) == "kcore":
+                       file_name = os.getenv("PERF_KCORE")
+                       f = TryOpen(file_name) if file_name else None
+                       if f:
+                               return f
+                       # For now, no special handling if long_name is /proc/kcore
+                       f = TryOpen(long_name)
+                       if f:
+                               return f
+               f = self.FileFromBuildId(build_id)
+               if f:
+                       return f
+               return None
+
+       def AddInstanceToShutdownOnExit(self, instance):
+               self.instances_to_shutdown_on_exit.add(instance)
+
+       # Shutdown any background processes or threads
+       def ShutdownInstances(self):
+               for x in self.instances_to_shutdown_on_exit:
+                       try:
+                               x.Shutdown()
+                       except:
+                               pass
+
+# Database reference
+
+class DBRef():
+
+       def __init__(self, is_sqlite3, dbname):
+               self.is_sqlite3 = is_sqlite3
+               self.dbname = dbname
+
+       def Open(self, connection_name):
+               dbname = self.dbname
+               if self.is_sqlite3:
+                       db = QSqlDatabase.addDatabase("QSQLITE", connection_name)
+               else:
+                       db = QSqlDatabase.addDatabase("QPSQL", connection_name)
+                       opts = dbname.split()
+                       for opt in opts:
+                               if "=" in opt:
+                                       opt = opt.split("=")
+                                       if opt[0] == "hostname":
+                                               db.setHostName(opt[1])
+                                       elif opt[0] == "port":
+                                               db.setPort(int(opt[1]))
+                                       elif opt[0] == "username":
+                                               db.setUserName(opt[1])
+                                       elif opt[0] == "password":
+                                               db.setPassword(opt[1])
+                                       elif opt[0] == "dbname":
+                                               dbname = opt[1]
+                               else:
+                                       dbname = opt
+
+               db.setDatabaseName(dbname)
+               if not db.open():
+                       raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+               return db, dbname
+
+# Main
+
+def Main():
+       if (len(sys.argv) < 2):
+               print >> sys.stderr, "Usage is: exported-sql-viewer.py <database name>"
+               raise Exception("Too few arguments")
+
+       dbname = sys.argv[1]
+
+       is_sqlite3 = False
+       try:
+               f = open(dbname)
+               if f.read(15) == "SQLite format 3":
+                       is_sqlite3 = True
+               f.close()
+       except:
+               pass
+
+       dbref = DBRef(is_sqlite3, dbname)
+       db, dbname = dbref.Open("main")
+       glb = Glb(dbref, db, dbname)
+       app = QApplication(sys.argv)
+       glb.app = app
+       mainwindow = MainWindow(glb)
+       glb.mainwindow = mainwindow
+       mainwindow.show()
+       err = app.exec_()
+       glb.ShutdownInstances()
+       db.close()
+       sys.exit(err)
+
+if __name__ == "__main__":
+       Main()
index c3b0afd67760aeff5dfe12c8a8cb36ce5b0505f3..3043130732427d145e23efd8d0ebe3a2dfaefdeb 100644 (file)
@@ -5,6 +5,7 @@ ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
 libperf-y += ioctl.o
 endif
 libperf-y += kcmp.o
+libperf-y += mount_flags.o
 libperf-y += pkey_alloc.o
 libperf-y += prctl.o
 libperf-y += sockaddr.o
index 2570152d3909781ef1a1db395527c77d7935f7a9..039c29039b2c4d752591e99d037397720497da5e 100644 (file)
@@ -24,6 +24,7 @@ struct strarray {
 }
 
 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val);
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags);
 
 struct trace;
 struct thread;
@@ -122,6 +123,12 @@ size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_ar
 size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx
 
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigned long flags);
+#define SCAMV_MOUNT_FLAGS syscall_arg__mask_val_mount_flags
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags
+
 size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
 
index d64d049ab9915162e220f91c3f59997d8abddc15..010406500c30476b229306c0565fb074a4a337b6 100644 (file)
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/cone.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
index 9d3816815e60f48ff8ef8f6b77faf779a6d9bd1b..9aa94fd523a9c2ddddf14f6dbd3790b559d78ee6 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/drm/
 
index 5d6a477a64002ef5cef6ef6e169cd439e38b66d9..db5b9b4921137cfadf1e8f2a0caa3d9358641559 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef EFD_SEMAPHORE
 #define EFD_SEMAPHORE          1
 #endif
index 9e8900c13cb1380a38e016430e572191a534b831..e6de31674e246ea8c22574eddfe6cb82e794508d 100644 (file)
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/fcntl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
index c4ff6ad30b0627ae1561d67404e2d208ebf6506a..cf02ae5f0ba66eca3f2adc4f8c36ef28e4bde6c2 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 
 #include "trace/beauty/beauty.h"
 #include <linux/kernel.h>
index 61850fbc85ff33d75c074e655eaa490dbe48e537..1136bde56406e6018c0c64cb801bf27d8376cdc3 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <linux/futex.h>
 
 #ifndef FUTEX_WAIT_BITSET
index 26f6b3253511e6240efb62bf958bad8c65276508..138b7d588a7083ff9161d4f52cdbdfecaec2a626 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <linux/futex.h>
 
 #ifndef FUTEX_BITSET_MATCH_ANY
index 1be3b4cf082708194ca7c4c8403219eedb924c3d..5d2a7fd8d407787daedadb02e5fdb62b3e6f2436 100644 (file)
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/ioctl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
index f62040eb9d5c5cebb8685509935c52fec7f6562c..b276a274f2030b0dd66637a498fa2086f8c8926c 100644 (file)
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/kcmp.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
index a3c304caa336572baaa59d48b684b7413918429a..df8b17486d575c8564f33b3522a675ce1b4372d5 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
index c4699fd46bb64a3230232e452b366551a17c3609..4ce54f5bf7564522036f28475ec4de4a3e94fd26 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
index 431639eb4d29a9c35011f6d11d20d46ec92d9b09..4527d290cdfc6499510c18fdb9bf0e066decdbc1 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
 
index 9f68077b241b9c59475cb5d8403e0002f06769a3..c534bd96ef5c9dc8a8d762de61cb73a3537ddf4c 100644 (file)
@@ -1,5 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <uapi/linux/mman.h>
+#include <linux/log2.h>
 
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
                                               struct syscall_arg *arg)
@@ -30,50 +31,23 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
+static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mmap_flags_array.c"
+       static DEFINE_STRARRAY(mmap_flags);
+
+       return strarray__scnprintf_flags(&strarray__mmap_flags, bf, size, flags);
+}
+
 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
                                                struct syscall_arg *arg)
 {
-       int printed = 0, flags = arg->val;
+       unsigned long flags = arg->val;
 
        if (flags & MAP_ANONYMOUS)
                arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */
 
-#define        P_MMAP_FLAG(n) \
-       if (flags & MAP_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~MAP_##n; \
-       }
-
-       P_MMAP_FLAG(SHARED);
-       P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
-       P_MMAP_FLAG(32BIT);
-#endif
-       P_MMAP_FLAG(ANONYMOUS);
-       P_MMAP_FLAG(DENYWRITE);
-       P_MMAP_FLAG(EXECUTABLE);
-       P_MMAP_FLAG(FILE);
-       P_MMAP_FLAG(FIXED);
-#ifdef MAP_FIXED_NOREPLACE
-       P_MMAP_FLAG(FIXED_NOREPLACE);
-#endif
-       P_MMAP_FLAG(GROWSDOWN);
-       P_MMAP_FLAG(HUGETLB);
-       P_MMAP_FLAG(LOCKED);
-       P_MMAP_FLAG(NONBLOCK);
-       P_MMAP_FLAG(NORESERVE);
-       P_MMAP_FLAG(POPULATE);
-       P_MMAP_FLAG(STACK);
-       P_MMAP_FLAG(UNINITIALIZED);
-#ifdef MAP_SYNC
-       P_MMAP_FLAG(SYNC);
-#endif
-#undef P_MMAP_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
+       return mmap__scnprintf_flags(flags, bf, size);
 }
 
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
new file mode 100755 (executable)
index 0000000..22c3fdc
--- /dev/null
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 2 ] ; then
+       [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/`
+       header_dir=tools/include/uapi/asm-generic
+       arch_header_dir=tools/arch/${hostarch}/include/uapi/asm
+else
+       header_dir=$1
+       arch_header_dir=$2
+fi
+
+arch_mman=${arch_header_dir}/mman.h
+
+# those in egrep -vw are flags, we want just the bits
+
+printf "static const char *mmap_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+egrep -q $regex ${arch_mman} && \
+(egrep $regex ${arch_mman} | \
+       sed -r "s/$regex/\2 \1/g"       | \
+       xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman-common.h | \
+       egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
+       sed -r "s/$regex/\2 \1/g"       | \
+       xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman.h | \
+       sed -r "s/$regex/\2 \1/g"       | \
+       xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+printf "};\n"
index d929ad7dd97be97b922ecf9d92c35c4b072a5fbe..6879d36d30048e6d08df13027071c7711ca6aa82 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
diff --git a/tools/perf/trace/beauty/mount_flags.c b/tools/perf/trace/beauty/mount_flags.c
new file mode 100644 (file)
index 0000000..712935c
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/mount_flags.c
+ *
+ *  Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <sys/mount.h>
+
+static size_t mount__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mount_flags_array.c"
+       static DEFINE_STRARRAY(mount_flags);
+
+       return strarray__scnprintf_flags(&strarray__mount_flags, bf, size, flags);
+}
+
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg __maybe_unused, unsigned long flags)
+{
+       // do_mount in fs/namespace.c:
+       /*
+        * Pre-0.97 versions of mount() didn't have a flags word.  When the
+        * flags word was introduced its top half was required to have the
+        * magic value 0xC0ED, and this remained so until 2.4.0-test9.
+        * Therefore, if this magic number is present, it carries no
+        * information and must be discarded.
+        */
+       if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+               flags &= ~MS_MGC_MSK;
+
+       return flags;
+}
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+       unsigned long flags = arg->val;
+
+       return mount__scnprintf_flags(flags, bf, size);
+}
diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh
new file mode 100755 (executable)
index 0000000..4554757
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *mount_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \
+       sed -r "s/$regex/\2 \2 \1/g" | sort -n | \
+       xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | \
+       sed -r "s/$regex/\2 \1/g" | \
+       xargs printf "\t[%s + 1] = \"%s\",\n"
+printf "};\n"
index c064d6aae659707712f2b0c177c41e0bec66e70d..1b9d6306d2749b189ecd7315aa4de34331d9bb7e 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/socket.h>
 
index 6aec6178a99dcd8ecd3d01eee8f4969d5d8387bf..cc673fec9184d659c6af961f0753b06a0d0dea1b 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
index 2bafd7c995ffffe394880904b78c77ff73a1b6dc..981185c1974ba58dd2df5eb65b1efeef2c6ae211 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef PERF_FLAG_FD_NO_GROUP
 # define PERF_FLAG_FD_NO_GROUP         (1UL << 0)
 #endif
index 6492c74df928df48bca7cf49214b272eefbe13ac..9aabd9743ef6e0b34c4492c4936fda36faad08a3 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
index 0313df34283040de8648cdbba73ae5d174354b36..1a6acc46807bca7e73f395c37e62a50ed8290773 100644 (file)
@@ -1,4 +1,5 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
+
 size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
 {
        int pid = arg->val;
index 2ba784a3734adb2fd7088016970c627a508511f8..1b8ed4cac8153ddfc3ff616118bbfd545e48eefb 100644 (file)
@@ -1,40 +1,36 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/pkey_alloc.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
 #include <linux/kernel.h>
 #include <linux/log2.h>
 
-static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags)
 {
        int i, printed = 0;
 
-#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
-       static DEFINE_STRARRAY(pkey_alloc_access_rights);
-
-       if (access_rights == 0) {
-               const char *s = strarray__pkey_alloc_access_rights.entries[0];
+       if (flags == 0) {
+               const char *s = sa->entries[0];
                if (s)
                        return scnprintf(bf, size, "%s", s);
                return scnprintf(bf, size, "%d", 0);
        }
 
-       for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) {
-               int bit = 1 << (i - 1);
+       for (i = 1; i < sa->nr_entries; ++i) {
+               unsigned long bit = 1UL << (i - 1);
 
-               if (!(access_rights & bit))
+               if (!(flags & bit))
                        continue;
 
                if (printed != 0)
                        printed += scnprintf(bf + printed, size - printed, "|");
 
-               if (strarray__pkey_alloc_access_rights.entries[i] != NULL)
-                       printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]);
+               if (sa->entries[i] != NULL)
+                       printed += scnprintf(bf + printed, size - printed, "%s", sa->entries[i]);
                else
                        printed += scnprintf(bf + printed, size - printed, "0x%#", bit);
        }
@@ -42,6 +38,14 @@ static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, s
        return printed;
 }
 
+static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+{
+#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
+       static DEFINE_STRARRAY(pkey_alloc_access_rights);
+
+       return strarray__scnprintf_flags(&strarray__pkey_alloc_access_rights, bf, size, access_rights);
+}
+
 size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg)
 {
        unsigned long cmd = arg->val;
index e0a51aeb20b21a8cde5eee01336b2b11af4fef7f..f8f1b560cf8a4807d0df7f643f0ba39a7ee5cb4d 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
 
index 246130dad6c413b72ca0c961d635657facfc6c25..be7a5d3959757ec1df7c14e9221c278cb0115e18 100644 (file)
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/prctl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
index f24722146ebef73061ad4339a639d65ac0929558..d32f8f1124af0aafdb8af94f18e84c1722759295 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
index ba5096ae76b60906166df1b2b6ac72f1b85c4a8c..48f2b5c9aa3ed8a3900da69732585883ee701943 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sched.h>
 
 /*
index b7097fd5fed9ee72401fe651f5565b6bdb0f0d88..e36156b19c708d51de58f11396eefdc2aa8f60fe 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef SECCOMP_SET_MODE_STRICT
 #define SECCOMP_SET_MODE_STRICT 0
 #endif
index bde18a53f090945f9edd7ed517762cc4f5a8de7d..587fec545b8a6e9a43c53dd83a1c6995ce15572a 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <signal.h>
 
 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
index eb511bb5fbd3211697aa1821c66dde2e78f5267e..e0803b9575932420f2db3f47883af51e94e3122b 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
 
index 6818392968b24f130f82896a41f65b15471458f0..7a464a7bf91399bf6683022f330abdacbe71bfc9 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
 
index 71a79f72d9d929fdf48541b63477aafdc813d2b8..9410ad230f10144e77633f7c03c7b5c212f543fb 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 // Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 
 #include "trace/beauty/beauty.h"
index 65227269384b14aa7a10f7f91008d0e3acdfe99b..d971a2596417473fb5398ab0e0114220a6faf2ce 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/socket.c
  *
index a3cc24633bec53d272b90bc08d7d31b61b67f55f..de0f2f29017f2edbf09b236d9b9fdd6830bbae1b 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
index bca26aef4a77a8a72d75841673c35ee568e24e54..a63a9a332aa0f89a926ce166b98950f45f3bac4f 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/socket.h>
 
index 5643b692af4cf60c6a791a148e1c35b30567d002..630f2760dd6667409c31d3b31cd9c0b65ad7e705 100644 (file)
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/statx.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
index 0f6a5197d0bede8456bcf00d702ff4a51bf39b9d..439773daaf77db263edf4b7299858cc4e074b6ee 100755 (executable)
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
index 8465281a093de02aa3e7e026e35eeb688634929a..42ff58ad613b8ac8557d7ddb36078a42f84a817f 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/wait.h>
 
index 28cd6a17491b2077815ce0d9bb86f741f7a2be6e..6936daf89dddcd61823fa95582eddd7915b14752 100644 (file)
@@ -139,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
 #include "arch/x86/annotate/instructions.c"
 #include "arch/powerpc/annotate/instructions.c"
 #include "arch/s390/annotate/instructions.c"
+#include "arch/sparc/annotate/instructions.c"
 
 static struct arch architectures[] = {
        {
@@ -170,6 +171,13 @@ static struct arch architectures[] = {
                        .comment_char = '#',
                },
        },
+       {
+               .name = "sparc",
+               .init = sparc__annotate_init,
+               .objdump = {
+                       .comment_char = '#',
+               },
+       },
 };
 
 static void ins__delete(struct ins_operands *ops)
index c4617bcfd521f0ecdcf1dd3b5216014419dcdc2a..72d5ba2479bf19ba1ec9e92d576fc3b8db953ca9 100644 (file)
@@ -962,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_session *session,
 #define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ     64
 #define PERF_ITRACE_MAX_LAST_BRANCH_SZ         1024
 
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+                                   bool no_sample)
 {
-       synth_opts->instructions = true;
        synth_opts->branches = true;
        synth_opts->transactions = true;
        synth_opts->ptwrites = true;
        synth_opts->pwr_events = true;
        synth_opts->errors = true;
-       synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
-       synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+       if (no_sample) {
+               synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+               synth_opts->period = 1;
+               synth_opts->calls = true;
+       } else {
+               synth_opts->instructions = true;
+               synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+               synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+       }
        synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
        synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
        synth_opts->initial_skip = 0;
@@ -999,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
        }
 
        if (!str) {
-               itrace_synth_opts__set_default(synth_opts);
+               itrace_synth_opts__set_default(synth_opts, false);
                return 0;
        }
 
index d88f6e9eb4611ab7344eb480a11c14bd2c79afe7..8e50f96d4b23de86e4939d7276e024f7c353aa81 100644 (file)
@@ -58,6 +58,7 @@ enum itrace_period_type {
 /**
  * struct itrace_synth_opts - AUX area tracing synthesis options.
  * @set: indicates whether or not options have been set
+ * @default_no_sample: Default to no sampling.
  * @inject: indicates the event (not just the sample) must be fully synthesized
  *          because 'perf inject' will write it out
  * @instructions: whether to synthesize 'instructions' events
@@ -82,6 +83,7 @@ enum itrace_period_type {
  */
 struct itrace_synth_opts {
        bool                    set;
+       bool                    default_no_sample;
        bool                    inject;
        bool                    instructions;
        bool                    branches;
@@ -528,7 +530,8 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
                                       union perf_event *event);
 int itrace_parse_synth_opts(const struct option *opt, const char *str,
                            int unset);
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+                                   bool no_sample);
 
 size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
 void perf_session__auxtrace_error_inc(struct perf_session *session,
index 2ae640257fdbbe897d8c398d1dfeeb406d1c9fb1..73430b73570d51f9f2f96131208ac5f2022dee37 100644 (file)
@@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_session *session)
        zfree(&aux);
 }
 
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
+{
+       struct machine *machine;
+
+       machine = etmq->etm->machine;
+
+       if (address >= etmq->etm->kernel_start) {
+               if (machine__is_host(machine))
+                       return PERF_RECORD_MISC_KERNEL;
+               else
+                       return PERF_RECORD_MISC_GUEST_KERNEL;
+       } else {
+               if (machine__is_host(machine))
+                       return PERF_RECORD_MISC_USER;
+               else if (perf_guest)
+                       return PERF_RECORD_MISC_GUEST_USER;
+               else
+                       return PERF_RECORD_MISC_HYPERVISOR;
+       }
+}
+
 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
                              size_t size, u8 *buffer)
 {
@@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
                return -1;
 
        machine = etmq->etm->machine;
-       if (address >= etmq->etm->kernel_start)
-               cpumode = PERF_RECORD_MISC_KERNEL;
-       else
-               cpumode = PERF_RECORD_MISC_USER;
+       cpumode = cs_etm__cpu_mode(etmq, address);
 
        thread = etmq->thread;
        if (!thread) {
@@ -653,7 +671,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
        struct perf_sample sample = {.ip = 0,};
 
        event->sample.header.type = PERF_RECORD_SAMPLE;
-       event->sample.header.misc = PERF_RECORD_MISC_USER;
+       event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
        event->sample.header.size = sizeof(struct perf_event_header);
 
        sample.ip = addr;
@@ -665,7 +683,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
        sample.cpu = etmq->packet->cpu;
        sample.flags = 0;
        sample.insn_len = 1;
-       sample.cpumode = event->header.misc;
+       sample.cpumode = event->sample.header.misc;
 
        if (etm->synth_opts.last_branch) {
                cs_etm__copy_last_branch_rb(etmq);
@@ -706,12 +724,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
                u64                     nr;
                struct branch_entry     entries;
        } dummy_bs;
+       u64 ip;
+
+       ip = cs_etm__last_executed_instr(etmq->prev_packet);
 
        event->sample.header.type = PERF_RECORD_SAMPLE;
-       event->sample.header.misc = PERF_RECORD_MISC_USER;
+       event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
        event->sample.header.size = sizeof(struct perf_event_header);
 
-       sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
+       sample.ip = ip;
        sample.pid = etmq->pid;
        sample.tid = etmq->tid;
        sample.addr = cs_etm__first_executed_instr(etmq->packet);
@@ -720,7 +741,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
        sample.period = 1;
        sample.cpu = etmq->packet->cpu;
        sample.flags = 0;
-       sample.cpumode = PERF_RECORD_MISC_USER;
+       sample.cpumode = event->sample.header.misc;
 
        /*
         * perf report cannot handle events without a branch stack
@@ -1432,7 +1453,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
        if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
                etm->synth_opts = *session->itrace_synth_opts;
        } else {
-               itrace_synth_opts__set_default(&etm->synth_opts);
+               itrace_synth_opts__set_default(&etm->synth_opts,
+                               session->itrace_synth_opts->default_no_sample);
                etm->synth_opts.callchain = false;
        }
 
index 1f3ccc36853030bc8ea58d35ea19bba8e225f732..d01b8355f4caba9440d0e0d4db4c6f241e801dc7 100644 (file)
@@ -63,6 +63,7 @@ struct perf_env {
        struct numa_node        *numa_nodes;
        struct memory_node      *memory_nodes;
        unsigned long long       memory_bsize;
+       u64                     clockid_res_ns;
 };
 
 extern struct perf_env perf_env;
index bc646185f8d91fe3d339264d6b0ea9925b66554f..e9c108a6b1c34fd2cc60ed5690c3d2fba595647c 100644 (file)
@@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
        event->fork.pid  = tgid;
        event->fork.tid  = pid;
        event->fork.header.type = PERF_RECORD_FORK;
+       event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
 
        event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
 
index be440df296150450b4e99e2ab2c5ab83175d29aa..e88e6f9b1463f0674a0eaf12423b35978e58d8da 100644 (file)
@@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
        struct perf_evsel *pos;
 
        evlist__for_each_entry(evlist, pos) {
-               if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+               if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
                        continue;
                perf_evsel__disable(pos);
        }
index 29d7b97f66fbc5ae8efe67d3880263711d0a4d1a..6d187059a37360ae669220c913c535b1278d6514 100644 (file)
@@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
        evsel->leader      = evsel;
        evsel->unit        = "";
        evsel->scale       = 1.0;
+       evsel->max_events  = ULONG_MAX;
        evsel->evlist      = NULL;
        evsel->bpf_fd      = -1;
        INIT_LIST_HEAD(&evsel->node);
@@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
                case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
                        max_stack = term->val.max_stack;
                        break;
+               case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
+                       evsel->max_events = term->val.max_events;
+                       break;
                case PERF_EVSEL__CONFIG_TERM_INHERIT:
                        /*
                         * attr->inherit should has already been set by
@@ -1203,16 +1207,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
 
 int perf_evsel__enable(struct perf_evsel *evsel)
 {
-       return perf_evsel__run_ioctl(evsel,
-                                    PERF_EVENT_IOC_ENABLE,
-                                    0);
+       int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+
+       if (!err)
+               evsel->disabled = false;
+
+       return err;
 }
 
 int perf_evsel__disable(struct perf_evsel *evsel)
 {
-       return perf_evsel__run_ioctl(evsel,
-                                    PERF_EVENT_IOC_DISABLE,
-                                    0);
+       int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+       /*
+        * We mark it disabled here so that tools that disable a event can
+        * ignore events after they disable it. I.e. the ring buffer may have
+        * already a few more events queued up before the kernel got the stop
+        * request.
+        */
+       if (!err)
+               evsel->disabled = true;
+
+       return err;
 }
 
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
index 4107c39f4a54a97c7d3155a34e2814696e4ad21d..3147ca76c6fc24a3c63f59a0368e7976cfa464a4 100644 (file)
@@ -46,6 +46,7 @@ enum term_type {
        PERF_EVSEL__CONFIG_TERM_STACK_USER,
        PERF_EVSEL__CONFIG_TERM_INHERIT,
        PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+       PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
        PERF_EVSEL__CONFIG_TERM_OVERWRITE,
        PERF_EVSEL__CONFIG_TERM_DRV_CFG,
        PERF_EVSEL__CONFIG_TERM_BRANCH,
@@ -65,6 +66,7 @@ struct perf_evsel_config_term {
                bool    inherit;
                bool    overwrite;
                char    *branch;
+               unsigned long max_events;
        } val;
        bool weak;
 };
@@ -99,6 +101,8 @@ struct perf_evsel {
        struct perf_counts      *prev_raw_counts;
        int                     idx;
        u32                     ids;
+       unsigned long           max_events;
+       unsigned long           nr_events_printed;
        char                    *name;
        double                  scale;
        const char              *unit;
@@ -119,6 +123,7 @@ struct perf_evsel {
        bool                    snapshot;
        bool                    supported;
        bool                    needs_swap;
+       bool                    disabled;
        bool                    no_aux_samples;
        bool                    immediate;
        bool                    system_wide;
index de322d51c7fe2c2a6e821d8e10c9dc4a7f264587..b72440bf9a7967c5e4ca1e305729c10af1aaa0f4 100644 (file)
@@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
 #elif defined(__powerpc__)
 #define GEN_ELF_ARCH   EM_PPC
 #define GEN_ELF_CLASS  ELFCLASS32
+#elif defined(__sparc__) && defined(__arch64__)
+#define GEN_ELF_ARCH   EM_SPARCV9
+#define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__sparc__)
+#define GEN_ELF_ARCH   EM_SPARC
+#define GEN_ELF_CLASS  ELFCLASS32
 #else
 #error "unsupported architecture"
 #endif
index 1ec1d9bc2d6356bf98d053aec68331c21142907a..4fd45be95a433e32ac8839dd6f9b402be6d34b00 100644 (file)
@@ -1034,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff,
        return err;
 }
 
+static int write_clockid(struct feat_fd *ff,
+                        struct perf_evlist *evlist __maybe_unused)
+{
+       return do_write(ff, &ff->ph->env.clockid_res_ns,
+                       sizeof(ff->ph->env.clockid_res_ns));
+}
+
 static int cpu_cache_level__sort(const void *a, const void *b)
 {
        struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1508,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
                fprintf(fp, "# Core ID and Socket ID information is not available\n");
 }
 
+static void print_clockid(struct feat_fd *ff, FILE *fp)
+{
+       fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
+               ff->ph->env.clockid_res_ns * 1000);
+}
+
 static void free_event_desc(struct perf_evsel *events)
 {
        struct perf_evsel *evsel;
@@ -2531,6 +2544,15 @@ out:
        return ret;
 }
 
+static int process_clockid(struct feat_fd *ff,
+                          void *data __maybe_unused)
+{
+       if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+               return -1;
+
+       return 0;
+}
+
 struct feature_ops {
        int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
        void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2590,6 +2612,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPN(CACHE,         cache,          true),
        FEAT_OPR(SAMPLE_TIME,   sample_time,    false),
        FEAT_OPR(MEM_TOPOLOGY,  mem_topology,   true),
+       FEAT_OPR(CLOCKID,       clockid,        false)
 };
 
 struct header_print_data {
index e17903caa71daba074fbaaf04252525beac2df11..0d553ddca0a3049f941d96a0ae0d68b71ea7a49c 100644 (file)
@@ -38,6 +38,7 @@ enum {
        HEADER_CACHE,
        HEADER_SAMPLE_TIME,
        HEADER_MEM_TOPOLOGY,
+       HEADER_CLOCKID,
        HEADER_LAST_FEATURE,
        HEADER_FEAT_BITS        = 256,
 };
index 7f0c83b6332bfd94ca92eac2723c700421cf119a..7b27d77306c229d2478d8ceea9e668a4cee5a24d 100644 (file)
@@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
        return 0;
 }
 
+static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
+{
+       return machine__kernel_ip(bts->machine, ip) ?
+              PERF_RECORD_MISC_KERNEL :
+              PERF_RECORD_MISC_USER;
+}
+
 static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
                                         struct branch *branch)
 {
@@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
            bts->num_events++ <= bts->synth_opts.initial_skip)
                return 0;
 
-       event.sample.header.type = PERF_RECORD_SAMPLE;
-       event.sample.header.misc = PERF_RECORD_MISC_USER;
-       event.sample.header.size = sizeof(struct perf_event_header);
-
-       sample.cpumode = PERF_RECORD_MISC_USER;
        sample.ip = le64_to_cpu(branch->from);
+       sample.cpumode = intel_bts_cpumode(bts, sample.ip);
        sample.pid = btsq->pid;
        sample.tid = btsq->tid;
        sample.addr = le64_to_cpu(branch->to);
@@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
        sample.insn_len = btsq->intel_pt_insn.length;
        memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
 
+       event.sample.header.type = PERF_RECORD_SAMPLE;
+       event.sample.header.misc = sample.cpumode;
+       event.sample.header.size = sizeof(struct perf_event_header);
+
        if (bts->synth_opts.inject) {
                event.sample.header.size = bts->branches_event_size;
                ret = perf_event__synthesize_sample(&event,
@@ -910,7 +917,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
        if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
                bts->synth_opts = *session->itrace_synth_opts;
        } else {
-               itrace_synth_opts__set_default(&bts->synth_opts);
+               itrace_synth_opts__set_default(&bts->synth_opts,
+                               session->itrace_synth_opts->default_no_sample);
                if (session->itrace_synth_opts)
                        bts->synth_opts.thread_stack =
                                session->itrace_synth_opts->thread_stack;
index 48c1d415c6b069004dacddafdb759f25efb2b176..86cc9a64e982773408e2d51cc25a51a82c396ff5 100644 (file)
@@ -407,6 +407,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
        return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
 }
 
+static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
+{
+       return ip >= pt->kernel_start ?
+              PERF_RECORD_MISC_KERNEL :
+              PERF_RECORD_MISC_USER;
+}
+
 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
                                   uint64_t *insn_cnt_ptr, uint64_t *ip,
                                   uint64_t to_ip, uint64_t max_insn_cnt,
@@ -429,10 +436,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
        if (to_ip && *ip == to_ip)
                goto out_no_cache;
 
-       if (*ip >= ptq->pt->kernel_start)
-               cpumode = PERF_RECORD_MISC_KERNEL;
-       else
-               cpumode = PERF_RECORD_MISC_USER;
+       cpumode = intel_pt_cpumode(ptq->pt, *ip);
 
        thread = ptq->thread;
        if (!thread) {
@@ -759,7 +763,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
        if (pt->synth_opts.callchain) {
                size_t sz = sizeof(struct ip_callchain);
 
-               sz += pt->synth_opts.callchain_sz * sizeof(u64);
+               /* Add 1 to callchain_sz for callchain context */
+               sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
                ptq->chain = zalloc(sz);
                if (!ptq->chain)
                        goto out_free;
@@ -1058,15 +1063,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
                                   union perf_event *event,
                                   struct perf_sample *sample)
 {
-       event->sample.header.type = PERF_RECORD_SAMPLE;
-       event->sample.header.misc = PERF_RECORD_MISC_USER;
-       event->sample.header.size = sizeof(struct perf_event_header);
-
        if (!pt->timeless_decoding)
                sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 
-       sample->cpumode = PERF_RECORD_MISC_USER;
        sample->ip = ptq->state->from_ip;
+       sample->cpumode = intel_pt_cpumode(pt, sample->ip);
        sample->pid = ptq->pid;
        sample->tid = ptq->tid;
        sample->addr = ptq->state->to_ip;
@@ -1075,6 +1076,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
        sample->flags = ptq->flags;
        sample->insn_len = ptq->insn_len;
        memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+
+       event->sample.header.type = PERF_RECORD_SAMPLE;
+       event->sample.header.misc = sample->cpumode;
+       event->sample.header.size = sizeof(struct perf_event_header);
 }
 
 static int intel_pt_inject_event(union perf_event *event,
@@ -1160,7 +1165,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
 
        if (pt->synth_opts.callchain) {
                thread_stack__sample(ptq->thread, ptq->chain,
-                                    pt->synth_opts.callchain_sz, sample->ip);
+                                    pt->synth_opts.callchain_sz + 1,
+                                    sample->ip, pt->kernel_start);
                sample->callchain = ptq->chain;
        }
 
@@ -2559,7 +2565,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
        if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
                pt->synth_opts = *session->itrace_synth_opts;
        } else {
-               itrace_synth_opts__set_default(&pt->synth_opts);
+               itrace_synth_opts__set_default(&pt->synth_opts,
+                               session->itrace_synth_opts->default_no_sample);
                if (use_browser != -1) {
                        pt->synth_opts.branches = false;
                        pt->synth_opts.callchain = true;
index 111ae858cbcbdff402b140c5761f9e29107dc83c..8f36ce813bc5b20308a2f799ba76192118fae3d1 100644 (file)
@@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
        struct thread *parent = machine__findnew_thread(machine,
                                                        event->fork.ppid,
                                                        event->fork.ptid);
+       bool do_maps_clone = true;
        int err = 0;
 
        if (dump_trace)
@@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 
        thread = machine__findnew_thread(machine, event->fork.pid,
                                         event->fork.tid);
+       /*
+        * When synthesizing FORK events, we are trying to create thread
+        * objects for the already running tasks on the machine.
+        *
+        * Normally, for a kernel FORK event, we want to clone the parent's
+        * maps because that is what the kernel just did.
+        *
+        * But when synthesizing, this should not be done.  If we do, we end up
+        * with overlapping maps as we process the sythesized MMAP2 events that
+        * get delivered shortly thereafter.
+        *
+        * Use the FORK event misc flags in an internal way to signal this
+        * situation, so we can elide the map clone when appropriate.
+        */
+       if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
+               do_maps_clone = false;
 
        if (thread == NULL || parent == NULL ||
-           thread__fork(thread, parent, sample->time) < 0) {
+           thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
                dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
                err = -1;
        }
@@ -2140,6 +2157,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
        return 0;
 }
 
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+                            struct callchain_cursor *cursor,
+                            struct symbol **parent,
+                            struct addr_location *root_al,
+                            u8 *cpumode, int ent)
+{
+       int err = 0;
+
+       while (--ent >= 0) {
+               u64 ip = chain->ips[ent];
+
+               if (ip >= PERF_CONTEXT_MAX) {
+                       err = add_callchain_ip(thread, cursor, parent,
+                                              root_al, cpumode, ip,
+                                              false, NULL, NULL, 0);
+                       break;
+               }
+       }
+       return err;
+}
+
 static int thread__resolve_callchain_sample(struct thread *thread,
                                            struct callchain_cursor *cursor,
                                            struct perf_evsel *evsel,
@@ -2246,6 +2284,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
        }
 
 check_calls:
+       if (callchain_param.order != ORDER_CALLEE) {
+               err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+                                       &cpumode, chain->nr - first_call);
+               if (err)
+                       return (err < 0) ? err : 0;
+       }
        for (i = first_call, nr_entries = 0;
             i < chain_nr && nr_entries < max_stack; i++) {
                u64 ip;
@@ -2260,9 +2304,15 @@ check_calls:
                        continue;
 #endif
                ip = chain->ips[j];
-
                if (ip < PERF_CONTEXT_MAX)
                        ++nr_entries;
+               else if (callchain_param.order != ORDER_CALLEE) {
+                       err = find_prev_cpumode(chain, thread, cursor, parent,
+                                               root_al, &cpumode, j);
+                       if (err)
+                               return (err < 0) ? err : 0;
+                       continue;
+               }
 
                err = add_callchain_ip(thread, cursor, parent,
                                       root_al, &cpumode, ip,
index f8cd3e7c918668cc1f593b539c6648d16b091726..59be3466d64d329ae50b9d719836f5ddbb780832 100644 (file)
@@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
        [PARSE_EVENTS__TERM_TYPE_NOINHERIT]             = "no-inherit",
        [PARSE_EVENTS__TERM_TYPE_INHERIT]               = "inherit",
        [PARSE_EVENTS__TERM_TYPE_MAX_STACK]             = "max-stack",
+       [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS]            = "nr",
        [PARSE_EVENTS__TERM_TYPE_OVERWRITE]             = "overwrite",
        [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]           = "no-overwrite",
        [PARSE_EVENTS__TERM_TYPE_DRV_CFG]               = "driver-config",
@@ -1037,6 +1038,9 @@ do {                                                                         \
        case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
                CHECK_TYPE_VAL(NUM);
                break;
+       case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+               CHECK_TYPE_VAL(NUM);
+               break;
        default:
                err->str = strdup("unknown term");
                err->idx = term->err_term;
@@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
        case PARSE_EVENTS__TERM_TYPE_INHERIT:
        case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
        case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+       case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
        case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
        case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
                return config_term_common(attr, term, err);
@@ -1162,6 +1167,9 @@ do {                                                              \
                case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
                        ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
                        break;
+               case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+                       ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num);
+                       break;
                case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
                        ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
                        break;
index 4473dac27aee254fd6752cb06d9b7877a1ffdaeb..5ed035cbcbb72dcbcf5c73d39be2248c099e7452 100644 (file)
@@ -71,6 +71,7 @@ enum {
        PARSE_EVENTS__TERM_TYPE_NOINHERIT,
        PARSE_EVENTS__TERM_TYPE_INHERIT,
        PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+       PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
        PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
        PARSE_EVENTS__TERM_TYPE_OVERWRITE,
        PARSE_EVENTS__TERM_TYPE_DRV_CFG,
index 5f761f3ed0f3333fba455be4e2fa447282005e51..7805c71aaae2e53dbc74c072b4e5eb2a73e6c23a 100644 (file)
@@ -269,6 +269,7 @@ time                        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
 call-graph             { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
 stack-size             { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
 max-stack              { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+nr                     { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
 inherit                        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
 no-inherit             { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
 overwrite              { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
index 0281d5e2cd6703d0d0d34562a602b8d780b88926..66a84d5846c88ed912aff027943c6f8e9ff78ff2 100644 (file)
@@ -324,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
                        plt_entry_size = 16;
                        break;
 
-               default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
+               case EM_SPARC:
+                       plt_header_size = 48;
+                       plt_entry_size = 12;
+                       break;
+
+               case EM_SPARCV9:
+                       plt_header_size = 128;
+                       plt_entry_size = 32;
+                       break;
+
+               default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
                        plt_header_size = shdr_plt.sh_entsize;
                        plt_entry_size = shdr_plt.sh_entsize;
                        break;
index 20f49779116bd3ad7b98991688cc1cf4201e444c..d026d215bdc63244638c26e4ac4586fa8fe3a1cb 100644 (file)
@@ -123,7 +123,8 @@ struct symbol_conf {
        const char      *vmlinux_name,
                        *kallsyms_name,
                        *source_prefix,
-                       *field_sep;
+                       *field_sep,
+                       *graph_function;
        const char      *default_guest_vmlinux_name,
                        *default_guest_kallsyms,
                        *default_guest_modules;
index c091635bf7dcb317d66f1ab8273e5cc90678c5e1..61a4286a74dc9f86c333036013d5abe914b81b3f 100644 (file)
@@ -310,20 +310,46 @@ void thread_stack__free(struct thread *thread)
        }
 }
 
+static inline u64 callchain_context(u64 ip, u64 kernel_start)
+{
+       return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
+}
+
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
-                         size_t sz, u64 ip)
+                         size_t sz, u64 ip, u64 kernel_start)
 {
-       size_t i;
+       u64 context = callchain_context(ip, kernel_start);
+       u64 last_context;
+       size_t i, j;
 
-       if (!thread || !thread->ts)
-               chain->nr = 1;
-       else
-               chain->nr = min(sz, thread->ts->cnt + 1);
+       if (sz < 2) {
+               chain->nr = 0;
+               return;
+       }
 
-       chain->ips[0] = ip;
+       chain->ips[0] = context;
+       chain->ips[1] = ip;
+
+       if (!thread || !thread->ts) {
+               chain->nr = 2;
+               return;
+       }
+
+       last_context = context;
+
+       for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
+               ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
+               context = callchain_context(ip, kernel_start);
+               if (context != last_context) {
+                       if (i >= sz - 1)
+                               break;
+                       chain->ips[i++] = context;
+                       last_context = context;
+               }
+               chain->ips[i] = ip;
+       }
 
-       for (i = 1; i < chain->nr; i++)
-               chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
+       chain->nr = i;
 }
 
 struct call_return_processor *
index b7e41c4ebfdd98ec038f823e70051a49c3fa6a62..f97c00a8c2514dcc102f8e25d32e474815e2b030 100644 (file)
@@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
                        u64 to_ip, u16 insn_len, u64 trace_nr);
 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
-                         size_t sz, u64 ip);
+                         size_t sz, u64 ip, u64 kernel_start);
 int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
 size_t thread_stack__depth(struct thread *thread);
index 2048d393ece6f24b19f896e75173420f47d22e48..3d9ed7d0e2818f3aedea795b4083db31a22f5de6 100644 (file)
@@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread)
 }
 
 static int thread__clone_map_groups(struct thread *thread,
-                                   struct thread *parent)
+                                   struct thread *parent,
+                                   bool do_maps_clone)
 {
        /* This is new thread, we share map groups for process. */
        if (thread->pid_ == parent->pid_)
@@ -341,15 +342,11 @@ static int thread__clone_map_groups(struct thread *thread,
                         thread->pid_, thread->tid, parent->pid_, parent->tid);
                return 0;
        }
-
        /* But this one is new process, copy maps. */
-       if (map_groups__clone(thread, parent->mg) < 0)
-               return -ENOMEM;
-
-       return 0;
+       return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0;
 }
 
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
 {
        if (parent->comm_set) {
                const char *comm = thread__comm_str(parent);
@@ -362,7 +359,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
        }
 
        thread->ppid = parent->tid;
-       return thread__clone_map_groups(thread, parent);
+       return thread__clone_map_groups(thread, parent, do_maps_clone);
 }
 
 void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
index 07606aa6998d92252b7d63a4632750d9531356f2..30e2b4c165fe7341332b71365141c5f209ec23bd 100644 (file)
@@ -42,6 +42,8 @@ struct thread {
        void                            *addr_space;
        struct unwind_libunwind_ops     *unwind_libunwind_ops;
 #endif
+       bool                    filter;
+       int                     filter_entry_depth;
 };
 
 struct machine;
@@ -87,7 +89,7 @@ struct comm *thread__comm(const struct thread *thread);
 struct comm *thread__exec_comm(const struct thread *thread);
 const char *thread__comm_str(const struct thread *thread);
 int thread__insert_map(struct thread *thread, struct map *map);
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
 size_t thread__fprintf(struct thread *thread, FILE *fp);
 
 struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
index 6f318b15950e8e539f60ac11b3b7606007b496c3..5eff9bfc575836e0c6efd70cc82ee8f5a69bc283 100644 (file)
@@ -45,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip,
                Dwarf_Addr s;
 
                dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
-               if (s != al->map->start)
+               if (s != al->map->start - al->map->pgoff)
                        mod = 0;
        }
 
        if (!mod)
                mod = dwfl_report_elf(ui->dwfl, dso->short_name,
-                                     (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start,
+                                     (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
                                      false);
 
        return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
index d3273b5b3173e123d087c624a34f5b834395da54..ae8180b11d5fe6dde18f822a262405e07479bc68 100644 (file)
@@ -11,6 +11,8 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
+#include "bpf_rlimit.h"
+
 const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
 const char *cfg_map_name = "jmp_table";
 bool cfg_attach = true;
index 42544a969abc63b734267e696c78d2b338be0ca7..a9bc6f82abc163d47876d4af1f2dcbe540a6c814 100755 (executable)
@@ -10,7 +10,7 @@ wait_for_ip()
        echo -n "Wait for testing link-local IP to become available "
        for _i in $(seq ${MAX_PING_TRIES}); do
                echo -n "."
-               if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
+               if $PING6 -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
                        echo " OK"
                        return
                fi
@@ -58,5 +58,6 @@ BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
 BPF_PROG_SECTION="cgroup_id_logger"
 BPF_PROG_ID=0
 PROG="${DIR}/test_skb_cgroup_id_user"
+type ping6 >/dev/null 2>&1 && PING6="ping6" || PING6="ping -6"
 
 main
index 9832a875a828979be26b8756cbd589d5cf5e44f1..3b9fdb8094aa28b5e5abbe99f15b1e4502869a68 100755 (executable)
@@ -4,7 +4,8 @@ set -eu
 
 ping_once()
 {
-       ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
+       type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}"
+       $PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
 }
 
 wait_for_ip()
index 36f3d3009d1a079e57032b47b57bc306d045a3a8..6f61df62f690c69adceb2c328b231ab39b8b9c49 100644 (file)
@@ -76,7 +76,7 @@ struct bpf_test {
        int fixup_percpu_cgroup_storage[MAX_FIXUPS];
        const char *errstr;
        const char *errstr_unpriv;
-       uint32_t retval;
+       uint32_t retval, retval_unpriv;
        enum {
                UNDEF,
                ACCEPT,
@@ -3084,6 +3084,8 @@ static struct bpf_test tests[] = {
                .fixup_prog1 = { 2 },
                .result = ACCEPT,
                .retval = 42,
+               /* Verifier rewrite for unpriv skips tail call here. */
+               .retval_unpriv = 2,
        },
        {
                "stack pointer arithmetic",
@@ -6454,6 +6456,256 @@ static struct bpf_test tests[] = {
                .errstr = "R1 min value is negative",
                .prog_type = BPF_PROG_TYPE_TRACEPOINT,
        },
+       {
+               "map access: known scalar += value_ptr",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+                       BPF_MOV64_IMM(BPF_REG_1, 4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = ACCEPT,
+               .retval = 1,
+       },
+       {
+               "map access: value_ptr += known scalar",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+                       BPF_MOV64_IMM(BPF_REG_1, 4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = ACCEPT,
+               .retval = 1,
+       },
+       {
+               "map access: unknown scalar += value_ptr",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = ACCEPT,
+               .retval = 1,
+       },
+       {
+               "map access: value_ptr += unknown scalar",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = ACCEPT,
+               .retval = 1,
+       },
+       {
+               "map access: value_ptr += value_ptr",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = REJECT,
+               .errstr = "R0 pointer += pointer prohibited",
+       },
+       {
+               "map access: known scalar -= value_ptr",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+                       BPF_MOV64_IMM(BPF_REG_1, 4),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = REJECT,
+               .errstr = "R1 tried to subtract pointer from scalar",
+       },
+       {
+               "map access: value_ptr -= known scalar",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+                       BPF_MOV64_IMM(BPF_REG_1, 4),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = REJECT,
+               .errstr = "R0 min value is outside of the array range",
+       },
+       {
+               "map access: value_ptr -= known scalar, 2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+                       BPF_MOV64_IMM(BPF_REG_1, 6),
+                       BPF_MOV64_IMM(BPF_REG_2, 4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = ACCEPT,
+               .retval = 1,
+       },
+       {
+               "map access: unknown scalar -= value_ptr",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = REJECT,
+               .errstr = "R1 tried to subtract pointer from scalar",
+       },
+       {
+               "map access: value_ptr -= unknown scalar",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = REJECT,
+               .errstr = "R0 min value is negative",
+       },
+       {
+               "map access: value_ptr -= unknown scalar, 2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+                       BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = ACCEPT,
+               .retval = 1,
+       },
+       {
+               "map access: value_ptr -= value_ptr",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map_array_48b = { 3 },
+               .result = REJECT,
+               .errstr = "R0 invalid mem access 'inv'",
+               .errstr_unpriv = "R0 pointer -= pointer prohibited",
+       },
        {
                "map lookup helper access to map",
                .insns = {
@@ -13899,6 +14151,33 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
        }
 }
 
+static int set_admin(bool admin)
+{
+       cap_t caps;
+       const cap_value_t cap_val = CAP_SYS_ADMIN;
+       int ret = -1;
+
+       caps = cap_get_proc();
+       if (!caps) {
+               perror("cap_get_proc");
+               return -1;
+       }
+       if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+                               admin ? CAP_SET : CAP_CLEAR)) {
+               perror("cap_set_flag");
+               goto out;
+       }
+       if (cap_set_proc(caps)) {
+               perror("cap_set_proc");
+               goto out;
+       }
+       ret = 0;
+out:
+       if (cap_free(caps))
+               perror("cap_free");
+       return ret;
+}
+
 static void do_test_single(struct bpf_test *test, bool unpriv,
                           int *passes, int *errors)
 {
@@ -13907,6 +14186,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
        struct bpf_insn *prog = test->insns;
        int map_fds[MAX_NR_MAPS];
        const char *expected_err;
+       uint32_t expected_val;
        uint32_t retval;
        int i, err;
 
@@ -13926,6 +14206,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                       test->result_unpriv : test->result;
        expected_err = unpriv && test->errstr_unpriv ?
                       test->errstr_unpriv : test->errstr;
+       expected_val = unpriv && test->retval_unpriv ?
+                      test->retval_unpriv : test->retval;
 
        reject_from_alignment = fd_prog < 0 &&
                                (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -13959,16 +14241,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                __u8 tmp[TEST_DATA_LEN << 2];
                __u32 size_tmp = sizeof(tmp);
 
+               if (unpriv)
+                       set_admin(true);
                err = bpf_prog_test_run(fd_prog, 1, test->data,
                                        sizeof(test->data), tmp, &size_tmp,
                                        &retval, NULL);
+               if (unpriv)
+                       set_admin(false);
                if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
                        printf("Unexpected bpf_prog_test_run error\n");
                        goto fail_log;
                }
-               if (!err && retval != test->retval &&
-                   test->retval != POINTER_VALUE) {
-                       printf("FAIL retval %d != %d\n", retval, test->retval);
+               if (!err && retval != expected_val &&
+                   expected_val != POINTER_VALUE) {
+                       printf("FAIL retval %d != %d\n", retval, expected_val);
                        goto fail_log;
                }
        }
@@ -14011,33 +14297,6 @@ static bool is_admin(void)
        return (sysadmin == CAP_SET);
 }
 
-static int set_admin(bool admin)
-{
-       cap_t caps;
-       const cap_value_t cap_val = CAP_SYS_ADMIN;
-       int ret = -1;
-
-       caps = cap_get_proc();
-       if (!caps) {
-               perror("cap_get_proc");
-               return -1;
-       }
-       if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
-                               admin ? CAP_SET : CAP_CLEAR)) {
-               perror("cap_set_flag");
-               goto out;
-       }
-       if (cap_set_proc(caps)) {
-               perror("cap_set_proc");
-               goto out;
-       }
-       ret = 0;
-out:
-       if (cap_free(caps))
-               perror("cap_free");
-       return ret;
-}
-
 static void get_unpriv_disabled()
 {
        char buf[2];
index 0150bb2741eb1a0f2a3c219e7316b37753a17298..117f6f35d72fac57ad87f0ed4772a50511d45cb4 100755 (executable)
 # Thus we set MTU to 10K on all involved interfaces. Then both unicast and
 # multicast traffic uses 8K frames.
 #
-# +-----------------------+                +----------------------------------+
-# | H1                    |                |                               H2 |
-# |                       |                |  unicast --> + $h2.111           |
-# |                       |                |  traffic     | 192.0.2.129/28    |
-# |          multicast    |                |              | e-qos-map 0:1     |
-# |          traffic      |                |              |                   |
-# | $h1 + <-----          |                |              + $h2               |
-# +-----|-----------------+                +--------------|-------------------+
-#       |                                                 |
-# +-----|-------------------------------------------------|-------------------+
-# |     + $swp1                                           + $swp2             |
-# |     | >1Gbps                                          | >1Gbps            |
-# | +---|----------------+                     +----------|----------------+  |
-# | |   + $swp1.1        |                     |          + $swp2.111      |  |
+# +---------------------------+            +----------------------------------+
+# | H1                        |            |                               H2 |
+# |                           |            |  unicast --> + $h2.111           |
+# |                 multicast |            |  traffic     | 192.0.2.129/28    |
+# |                 traffic   |            |              | e-qos-map 0:1     |
+# |           $h1 + <-----    |            |              |                   |
+# | 192.0.2.65/28 |           |            |              + $h2               |
+# +---------------|-----------+            +--------------|-------------------+
+#                 |                                       |
+# +---------------|---------------------------------------|-------------------+
+# |         $swp1 +                                       + $swp2             |
+# |        >1Gbps |                                       | >1Gbps            |
+# | +-------------|------+                     +----------|----------------+  |
+# | |     $swp1.1 +      |                     |          + $swp2.111      |  |
 # | |                BR1 |             SW      | BR111                     |  |
-# | |   + $swp3.1        |                     |          + $swp3.111      |  |
-# | +---|----------------+                     +----------|----------------+  |
-# |     \_________________________________________________/                   |
+# | |     $swp3.1 +      |                     |          + $swp3.111      |  |
+# | +-------------|------+                     +----------|----------------+  |
+# |               \_______________________________________/                   |
 # |                                    |                                      |
 # |                                    + $swp3                                |
 # |                                    | 1Gbps bottleneck                     |
@@ -51,6 +51,7 @@
 #                                      |
 #                                   +--|-----------------+
 #                                   |  + $h3          H3 |
+#                                   |  | 192.0.2.66/28   |
 #                                   |  |                 |
 #                                   |  + $h3.111         |
 #                                   |    192.0.2.130/28  |
@@ -59,6 +60,7 @@
 ALL_TESTS="
        ping_ipv4
        test_mc_aware
+       test_uc_aware
 "
 
 lib_dir=$(dirname $0)/../../../net/forwarding
@@ -68,14 +70,14 @@ source $lib_dir/lib.sh
 
 h1_create()
 {
-       simple_if_init $h1
+       simple_if_init $h1 192.0.2.65/28
        mtu_set $h1 10000
 }
 
 h1_destroy()
 {
        mtu_restore $h1
-       simple_if_fini $h1
+       simple_if_fini $h1 192.0.2.65/28
 }
 
 h2_create()
@@ -97,7 +99,7 @@ h2_destroy()
 
 h3_create()
 {
-       simple_if_init $h3
+       simple_if_init $h3 192.0.2.66/28
        mtu_set $h3 10000
 
        vlan_create $h3 111 v$h3 192.0.2.130/28
@@ -108,7 +110,7 @@ h3_destroy()
        vlan_destroy $h3 111
 
        mtu_restore $h3
-       simple_if_fini $h3
+       simple_if_fini $h3 192.0.2.66/28
 }
 
 switch_create()
@@ -251,7 +253,7 @@ measure_uc_rate()
        # average ingress rate to somewhat mitigate this.
        local min_ingress=2147483648
 
-       mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+       $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
                -a own -b $h3mac -t udp -q &
        sleep 1
 
@@ -291,7 +293,7 @@ test_mc_aware()
        check_err $? "Could not get high enough UC-only ingress rate"
        local ucth1=${uc_rate[1]}
 
-       mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
+       $MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
 
        local d0=$(date +%s)
        local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
@@ -311,7 +313,7 @@ test_mc_aware()
                        ret = 100 * ($ucth1 - $ucth2) / $ucth1
                        if (ret > 0) { ret } else { 0 }
                    ")
-       check_err $(bc <<< "$deg > 10")
+       check_err $(bc <<< "$deg > 25")
 
        local interval=$((d1 - d0))
        local mc_ir=$(rate $u0 $u1 $interval)
@@ -335,6 +337,51 @@ test_mc_aware()
        echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
        echo "    ingress MC throughput $(humanize $mc_ir)"
        echo "    egress MC throughput  $(humanize $mc_er)"
+       echo
+}
+
+test_uc_aware()
+{
+       RET=0
+
+       $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+               -a own -b $h3mac -t udp -q &
+
+       local d0=$(date +%s)
+       local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+       local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+       sleep 1
+
+       local attempts=50
+       local passes=0
+       local i
+
+       for ((i = 0; i < attempts; ++i)); do
+               if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
+                       ((passes++))
+               fi
+
+               sleep 0.1
+       done
+
+       local d1=$(date +%s)
+       local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+       local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+       local interval=$((d1 - d0))
+       local uc_ir=$(rate $u0 $u1 $interval)
+       local uc_er=$(rate $t0 $t1 $interval)
+
+       ((attempts == passes))
+       check_err $?
+
+       # Suppress noise from killing mausezahn.
+       { kill %% && wait; } 2>/dev/null
+
+       log_test "MC performace under UC overload"
+       echo "    ingress UC throughput $(humanize ${uc_ir})"
+       echo "    egress UC throughput  $(humanize ${uc_er})"
+       echo "    sent $attempts BC ARPs, got $passes responses"
 }
 
 trap cleanup EXIT
index ede4d3dae7505ef31f822bac9b613aef464f6125..689f6c8ebcd8d649055069a342e98728433fe2e7 100644 (file)
@@ -1,12 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := cache_shape
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c
+TEST_GEN_PROGS := cache_shape
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
index bd5dfa509272a75b97b1dbf8ac2be00c0a1ab1cc..23f4caf48ffc6b6b84d00ac212ea6352eba4f723 100644 (file)
@@ -5,6 +5,9 @@ noarg:
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
+# Toolchains may build PIE by default which breaks the assembly
+LDFLAGS += -no-pie
+
 TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test    \
         cycles_with_freeze_test pmc56_overflow_test            \
         ebb_vs_cpu_event_test cpu_event_vs_ebb_test            \
index 9b35ca8e8f13d4bbe77ba24d3e061031071c39c3..8d3f006c98cc39d608f6d7a35acecd3e6a16b971 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
+TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
               ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
               perf-hwbreak ptrace-syscall
@@ -7,14 +7,9 @@ TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-all: $(TEST_PROGS)
-
 CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
 
-ptrace-pkey core-pkey: child.h
-ptrace-pkey core-pkey: LDLIBS += -pthread
-
-$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
index 327fa943c7f3663f9b305a4a1c4f3b572d866b3f..dbdffa2e2c8248f39652c6803245452926ed1790 100644 (file)
@@ -67,8 +67,8 @@ trans:
                "3: ;"
                : [res] "=r" (result), [texasr] "=r" (texasr)
                : [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
-               [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a),
-               [flt_2] "r" (&b), [flt_4] "r" (&d)
+               [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+               [flt_4] "b" (&d)
                : "memory", "r5", "r6", "r7",
                "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
                "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
index 44690f1bb26ae17230e146798efd80aeee155b28..85861c46b4457db2636c6f46cb551958cdfb73eb 100644 (file)
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0+
 
 TEST_GEN_PROGS := rfi_flush
+top_srcdir = ../../../../..
 
 CFLAGS += -I../../../../../usr/include
 
index 564ed45bbf731e2cbbed7e04967a22a6d00fb01e..0a7d0afb26b88529406fe2543dde1d5a6e7a1092 100644 (file)
@@ -49,6 +49,7 @@ int rfi_flush_test(void)
        struct perf_event_read v;
        __u64 l1d_misses_total = 0;
        unsigned long iterations = 100000, zero_size = 24 * 1024;
+       unsigned long l1d_misses_expected;
        int rfi_flush_org, rfi_flush;
 
        SKIP_IF(geteuid() != 0);
@@ -71,6 +72,12 @@ int rfi_flush_test(void)
 
        iter = repetitions;
 
+       /*
+        * We expect to see l1d miss for each cacheline access when rfi_flush
+        * is set. Allow a small variation on this.
+        */
+       l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
 again:
        FAIL_IF(perf_event_reset(fd));
 
@@ -78,10 +85,9 @@ again:
 
        FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
 
-       /* Expect at least zero_size/CACHELINE_SIZE misses per iteration */
-       if (v.l1d_misses >= (iterations * zero_size / CACHELINE_SIZE) && rfi_flush)
+       if (rfi_flush && v.l1d_misses >= l1d_misses_expected)
                passes++;
-       else if (v.l1d_misses < iterations && !rfi_flush)
+       else if (!rfi_flush && v.l1d_misses < (l1d_misses_expected / 2))
                passes++;
 
        l1d_misses_total += v.l1d_misses;
@@ -92,13 +98,15 @@ again:
        if (passes < repetitions) {
                printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n",
                       rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>',
-                      rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+                      rfi_flush ? repetitions * l1d_misses_expected :
+                      repetitions * l1d_misses_expected / 2,
                       repetitions - passes, repetitions);
                rc = 1;
        } else
                printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n",
                       rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<',
-                      rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+                      rfi_flush ? repetitions * l1d_misses_expected :
+                      repetitions * l1d_misses_expected / 2,
                       passes, repetitions);
 
        if (rfi_flush == rfi_flush_org) {
index 1fca25c6ace067ffb7a913508b4e13059cb04770..209a958dca127689bccd1ce8f03cde64237c1872 100644 (file)
@@ -1,15 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := signal signal_tm
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c signal.S
+TEST_GEN_PROGS := signal signal_tm
 
 CFLAGS += -maltivec
-signal_tm: CFLAGS += -mhtm
+$(OUTPUT)/signal_tm: CFLAGS += -mhtm
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-clean:
-       rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
index fcd2dcb8972babf90209b699307bd086f08c5f90..bdc081afedb0f0788c26ad4a6914e7895b9be90a 100644 (file)
@@ -8,6 +8,7 @@ EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
 top_srcdir = ../../../../..
 include ../../lib.mk
 
+$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
 $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
 
 $(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
index 43c342845be0ee1326214319ce8dabb64a958e94..ed62f4153d3eb58e3b0c426e6ecb14a666789a96 100644 (file)
@@ -25,7 +25,6 @@
 #include "utils.h"
 
 static char auxv[4096];
-extern unsigned int dscr_insn[];
 
 int read_auxv(char *buf, ssize_t buf_size)
 {
@@ -247,7 +246,8 @@ static void sigill_handler(int signr, siginfo_t *info, void *unused)
        ucontext_t *ctx = (ucontext_t *)unused;
        unsigned long *pc = &UCONTEXT_NIA(ctx);
 
-       if (*pc == (unsigned long)&dscr_insn) {
+       /* mtspr 3,RS to check for move to DSCR below */
+       if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
                if (!warned++)
                        printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
                *pc += 4;
@@ -271,5 +271,5 @@ void set_dscr(unsigned long val)
                init = 1;
        }
 
-       asm volatile("dscr_insn: mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+       asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
 }